58 files changed, 3606 insertions, 1889 deletions
diff --git a/src/backend/access/common/tupdesc.c b/src/backend/access/common/tupdesc.c
index 769f754b669..86d704e8d08 100644
--- a/src/backend/access/common/tupdesc.c
+++ b/src/backend/access/common/tupdesc.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/access/common/tupdesc.c,v 1.73 2001/03/22 06:16:06 momjian Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/access/common/tupdesc.c,v 1.74 2001/05/07 00:43:15 tgl Exp $
  *
  * NOTES
  *	  some of the executor utility code such as "ExecTypeFromTL" should be
@@ -237,16 +237,16 @@ equalTupleDescs(TupleDesc tupdesc1, TupleDesc tupdesc2)
 		Form_pg_attribute attr2 = tupdesc2->attrs[i];
 
 		/*
-		 * We do not need to check every single field here, and in fact
-		 * some fields such as attdispersion probably shouldn't be
-		 * compared.  We can also disregard attnum (it was used to place
-		 * the row in the attrs array) and everything derived from the
-		 * column datatype.
+		 * We do not need to check every single field here: we can disregard
+		 * attrelid, attnum (it was used to place the row in the attrs array)
+		 * and everything derived from the column datatype.
 		 */
 		if (strcmp(NameStr(attr1->attname), NameStr(attr2->attname)) != 0)
 			return false;
 		if (attr1->atttypid != attr2->atttypid)
 			return false;
+		if (attr1->attstattarget != attr2->attstattarget)
+			return false;
 		if (attr1->atttypmod != attr2->atttypmod)
 			return false;
 		if (attr1->attstorage != attr2->attstorage)
@@ -365,12 +365,12 @@ TupleDescInitEntry(TupleDesc desc,
 	else
 		MemSet(NameStr(att->attname), 0, NAMEDATALEN);
 
-	att->attdispersion = 0;		/* dummy value */
+	att->attstattarget = 0;
 	att->attcacheoff = -1;
 	att->atttypmod = typmod;
 
 	att->attnum = attributeNumber;
-	att->attnelems = attdim;
+	att->attndims = attdim;
 	att->attisset = attisset;
 
 	att->attnotnull = false;
@@ -506,7 +506,7 @@ TupleDescMakeSelfReference(TupleDesc desc,
 	att->attbyval = true;
 	att->attalign = 'i';
 	att->attstorage = 'p';
-	att->attnelems = 0;
+	att->attndims = 0;
 }
 
 /* ----------------------------------------------------------------
diff --git a/src/backend/access/gist/gist.c b/src/backend/access/gist/gist.c
index 1c5577b88a0..06010896821 100644
--- a/src/backend/access/gist/gist.c
+++ b/src/backend/access/gist/gist.c
@@ -6,7 +6,7 @@
  *
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/access/gist/gist.c,v 1.72 2001/03/22 03:59:12 momjian Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/access/gist/gist.c,v 1.73 2001/05/07 00:43:15 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -84,8 +84,8 @@ static void gist_dumptree(Relation r, int level, BlockNumber blk, OffsetNumber c
 #endif
 
 /*
-** routine to build an index.  Basically calls insert over and over
-*/
+ * routine to build an index.  Basically calls insert over and over
+ */
 Datum
 gistbuild(PG_FUNCTION_ARGS)
 {
@@ -105,7 +105,7 @@ gistbuild(PG_FUNCTION_ARGS)
 				itupdesc;
 	Datum		attdata[INDEX_MAX_KEYS];
 	char		nulls[INDEX_MAX_KEYS];
-	int			nhtups,
+	double		nhtups,
 				nitups;
 	Node	   *pred = indexInfo->ii_Predicate;
 
@@ -172,7 +172,7 @@ gistbuild(PG_FUNCTION_ARGS)
 #endif	 /* OMIT_PARTIAL_INDEX */
 
 	/* build the index */
-	nhtups = nitups = 0;
+	nhtups = nitups = 0.0;
 
 	compvec = (bool *) palloc(sizeof(bool) * indexInfo->ii_NumIndexAttrs);
 
@@ -183,7 +183,7 @@ gistbuild(PG_FUNCTION_ARGS)
 	{
 		MemoryContextReset(econtext->ecxt_per_tuple_memory);
 
-		nhtups++;
+		nhtups += 1.0;
 
 #ifndef OMIT_PARTIAL_INDEX
 
@@ -196,7 +196,7 @@ gistbuild(PG_FUNCTION_ARGS)
 			slot->val = htup;
 			if (ExecQual((List *) oldPred, econtext, false))
 			{
-				nitups++;
+				nitups += 1.0;
 				continue;
 			}
 		}
@@ -213,7 +213,7 @@ gistbuild(PG_FUNCTION_ARGS)
 		}
 #endif	 /* OMIT_PARTIAL_INDEX */
 
-		nitups++;
+		nitups += 1.0;
 
 		/*
 		 * For the current heap tuple, extract all the attributes we use
diff --git a/src/backend/access/hash/hash.c b/src/backend/access/hash/hash.c
index aa76ba232a0..9617fcc33a6 100644
--- a/src/backend/access/hash/hash.c
+++ b/src/backend/access/hash/hash.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/access/hash/hash.c,v 1.50 2001/03/22 03:59:12 momjian Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/access/hash/hash.c,v 1.51 2001/05/07 00:43:15 tgl Exp $
  *
  * NOTES
  *	  This file contains only the public interface routines.
@@ -57,7 +57,7 @@ hashbuild(PG_FUNCTION_ARGS)
 				itupdesc;
 	Datum		attdata[INDEX_MAX_KEYS];
 	char		nulls[INDEX_MAX_KEYS];
-	int			nhtups,
+	double		nhtups,
 				nitups;
 	HashItem	hitem;
 	Node	   *pred = indexInfo->ii_Predicate;
@@ -109,7 +109,7 @@ hashbuild(PG_FUNCTION_ARGS)
 #endif	 /* OMIT_PARTIAL_INDEX */
 
 	/* build the index */
-	nhtups = nitups = 0;
+	nhtups = nitups = 0.0;
 
 	/* start a heap scan */
 	hscan = heap_beginscan(heap, 0, SnapshotNow, 0, (ScanKey) NULL);
@@ -118,7 +118,7 @@ hashbuild(PG_FUNCTION_ARGS)
 	{
 		MemoryContextReset(econtext->ecxt_per_tuple_memory);
 
-		nhtups++;
+		nhtups += 1.0;
 
 #ifndef OMIT_PARTIAL_INDEX
 
@@ -131,7 +131,7 @@ hashbuild(PG_FUNCTION_ARGS)
 			slot->val = htup;
 			if (ExecQual((List *) oldPred, econtext, false))
 			{
-				nitups++;
+				nitups += 1.0;
 				continue;
 			}
 		}
@@ -148,7 +148,7 @@ hashbuild(PG_FUNCTION_ARGS)
 		}
 #endif	 /* OMIT_PARTIAL_INDEX */
 
-		nitups++;
+		nitups += 1.0;
 
 		/*
 		 * For the current heap tuple, extract all the attributes we use
diff --git a/src/backend/access/heap/tuptoaster.c b/src/backend/access/heap/tuptoaster.c
index fb509ab66de..2a9df577b10 100644
--- a/src/backend/access/heap/tuptoaster.c
+++ b/src/backend/access/heap/tuptoaster.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/access/heap/tuptoaster.c,v 1.21 2001/03/25 00:45:20 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/access/heap/tuptoaster.c,v 1.22 2001/05/07 00:43:15 tgl Exp $
  *
  *
  * INTERFACE ROUTINES
@@ -167,6 +167,43 @@ heap_tuple_untoast_attr(varattrib *attr)
 
 
 /* ----------
+ * toast_raw_datum_size -
+ *
+ *	Return the raw (detoasted) size of a varlena datum
+ * ----------
+ */
+Size
+toast_raw_datum_size(Datum value)
+{
+	varattrib  *attr = (varattrib *) DatumGetPointer(value);
+	Size		result;
+
+	if (VARATT_IS_COMPRESSED(attr))
+	{
+		/*
+		 * va_rawsize shows the original data size, whether the datum
+		 * is external or not.
+		 */
+		result = attr->va_content.va_compressed.va_rawsize + VARHDRSZ;
+	}
+	else if (VARATT_IS_EXTERNAL(attr))
+	{
+		/*
+		 * an uncompressed external attribute has rawsize including the
+		 * header (not too consistent!)
+		 */
+		result = attr->va_content.va_external.va_rawsize;
+	}
+	else
+	{
+		/* plain untoasted datum */
+		result = VARSIZE(attr);
+	}
+	return result;
+}
+
+
+/* ----------
  * toast_delete -
  *
  *	Cascaded delete toast-entries on DELETE
diff --git a/src/backend/access/nbtree/nbtree.c b/src/backend/access/nbtree/nbtree.c
index 97d99da4fde..f456e0c9306 100644
--- a/src/backend/access/nbtree/nbtree.c
+++ b/src/backend/access/nbtree/nbtree.c
@@ -12,7 +12,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtree.c,v 1.79 2001/03/22 03:59:15 momjian Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtree.c,v 1.80 2001/05/07 00:43:16 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -69,7 +69,7 @@ btbuild(PG_FUNCTION_ARGS)
 				itupdesc;
 	Datum		attdata[INDEX_MAX_KEYS];
 	char		nulls[INDEX_MAX_KEYS];
-	int			nhtups,
+	double		nhtups,
 				nitups;
 	Node	   *pred = indexInfo->ii_Predicate;
 
@@ -156,7 +156,7 @@ btbuild(PG_FUNCTION_ARGS)
 #endif	 /* OMIT_PARTIAL_INDEX */
 
 	/* build the index */
-	nhtups = nitups = 0;
+	nhtups = nitups = 0.0;
 
 	if (usefast)
 	{
@@ -196,7 +196,7 @@ btbuild(PG_FUNCTION_ARGS)
 
 		MemoryContextReset(econtext->ecxt_per_tuple_memory);
 
-		nhtups++;
+		nhtups += 1.0;
 
 #ifndef OMIT_PARTIAL_INDEX
 
@@ -209,7 +209,7 @@ btbuild(PG_FUNCTION_ARGS)
 			slot->val = htup;
 			if (ExecQual((List *) oldPred, econtext, false))
 			{
-				nitups++;
+				nitups += 1.0;
 				continue;
 			}
 		}
@@ -226,7 +226,7 @@ btbuild(PG_FUNCTION_ARGS)
 		}
 #endif	 /* OMIT_PARTIAL_INDEX */
 
-		nitups++;
+		nitups += 1.0;
 
 		/*
 		 * For the current heap tuple, extract all the attributes we use
diff --git a/src/backend/access/rtree/rtree.c b/src/backend/access/rtree/rtree.c
index 3752a59e99a..a8c6a13ea3c 100644
--- a/src/backend/access/rtree/rtree.c
+++ b/src/backend/access/rtree/rtree.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/access/rtree/Attic/rtree.c,v 1.61 2001/03/22 03:59:16 momjian Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/access/rtree/Attic/rtree.c,v 1.62 2001/05/07 00:43:16 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -100,7 +100,7 @@ rtbuild(PG_FUNCTION_ARGS)
 				itupdesc;
 	Datum		attdata[INDEX_MAX_KEYS];
 	char		nulls[INDEX_MAX_KEYS];
-	int			nhtups,
+	double		nhtups,
 				nitups;
 	Node	   *pred = indexInfo->ii_Predicate;
 
@@ -163,7 +163,7 @@ rtbuild(PG_FUNCTION_ARGS)
 #endif	 /* OMIT_PARTIAL_INDEX */
 
 	/* count the tuples as we insert them */
-	nhtups = nitups = 0;
+	nhtups = nitups = 0.0;
 
 	/* start a heap scan */
 	hscan = heap_beginscan(heap, 0, SnapshotNow, 0, (ScanKey) NULL);
@@ -172,7 +172,7 @@ rtbuild(PG_FUNCTION_ARGS)
 	{
 		MemoryContextReset(econtext->ecxt_per_tuple_memory);
 
-		nhtups++;
+		nhtups += 1.0;
 
 #ifndef OMIT_PARTIAL_INDEX
 
@@ -185,7 +185,7 @@ rtbuild(PG_FUNCTION_ARGS)
 			slot->val = htup;
 			if (ExecQual((List *) oldPred, econtext, false))
 			{
-				nitups++;
+				nitups += 1.0;
 				continue;
 			}
 		}
@@ -202,7 +202,7 @@ rtbuild(PG_FUNCTION_ARGS)
 		}
 #endif	 /* OMIT_PARTIAL_INDEX */
 
-		nitups++;
+		nitups += 1.0;
 
 		/*
 		 * For the current heap tuple, extract all the attributes we use
diff --git a/src/backend/catalog/genbki.sh b/src/backend/catalog/genbki.sh
index c2993fa8fc6..cac53f3e085 100644
--- a/src/backend/catalog/genbki.sh
+++ b/src/backend/catalog/genbki.sh
@@ -10,7 +10,7 @@
 #
 #
 # IDENTIFICATION
-#    $Header: /cvsroot/pgsql/src/backend/catalog/Attic/genbki.sh,v 1.19 2001/01/16 22:48:34 tgl Exp $
+#    $Header: /cvsroot/pgsql/src/backend/catalog/Attic/genbki.sh,v 1.20 2001/05/07 00:43:16 tgl Exp $
 #
 # NOTES
 #    non-essential whitespace is removed from the generated file.
@@ -126,10 +126,12 @@ for dir in $INCLUDE_DIRS; do
     fi
 done
 
-# Get INDEX_MAX_KEYS from config.h (who needs consistency?)
+# Get INDEX_MAX_KEYS and DEFAULT_ATTSTATTARGET from config.h
+# (who needs consistency?)
 for dir in $INCLUDE_DIRS; do
     if [ -f "$dir/config.h" ]; then
         INDEXMAXKEYS=`grep '#define[ 	]*INDEX_MAX_KEYS' $dir/config.h | $AWK '{ print $3 }'`
+        DEFAULTATTSTATTARGET=`grep '#define[ 	]*DEFAULT_ATTSTATTARGET' $dir/config.h | $AWK '{ print $3 }'`
         break
     fi
 done
@@ -168,6 +170,7 @@ sed -e "s/;[ 	]*$//g" \
     -e "s/(NameData/(name/g" \
     -e "s/(Oid/(oid/g" \
     -e "s/NAMEDATALEN/$NAMEDATALEN/g" \
+    -e "s/DEFAULT_ATTSTATTARGET/$DEFAULTATTSTATTARGET/g" \
     -e "s/INDEX_MAX_KEYS\*2/$INDEXMAXKEYS2/g" \
     -e "s/INDEX_MAX_KEYS\*4/$INDEXMAXKEYS4/g" \
     -e "s/INDEX_MAX_KEYS/$INDEXMAXKEYS/g" \
diff --git a/src/backend/catalog/heap.c b/src/backend/catalog/heap.c
index 54867d51a4b..03f16e11c3f 100644
--- a/src/backend/catalog/heap.c
+++ b/src/backend/catalog/heap.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/catalog/heap.c,v 1.162 2001/03/22 06:16:10 momjian Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/catalog/heap.c,v 1.163 2001/05/07 00:43:17 tgl Exp $
  *
  *
  * INTERFACE ROUTINES
@@ -96,54 +96,72 @@ static void RemoveStatistics(Relation rel);
 
 /*
  * Note:
- *		Should the executor special case these attributes in the future?
- *		Advantage:	consume 1/2 the space in the ATTRIBUTE relation.
- *		Disadvantage:  having rules to compute values in these tuples may
- *				be more difficult if not impossible.
+ *		Should the system special case these attributes in the future?
+ *		Advantage:	consume much less space in the ATTRIBUTE relation.
+ *		Disadvantage:  special cases will be all over the place.
  */
 
 static FormData_pg_attribute a1 = {
-	0xffffffff, {"ctid"}, TIDOID, 0, sizeof(ItemPointerData),
-	SelfItemPointerAttributeNumber, 0, -1, -1, '\0', 'p', '\0', 'i', '\0', '\0'
+	0, {"ctid"}, TIDOID, 0, sizeof(ItemPointerData),
+	SelfItemPointerAttributeNumber, 0, -1, -1,
+	false, 'p', false, 'i', false, false
 };
 
 static FormData_pg_attribute a2 = {
-	0xffffffff, {"oid"}, OIDOID, 0, sizeof(Oid),
-	ObjectIdAttributeNumber, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0'
+	0, {"oid"}, OIDOID, 0, sizeof(Oid),
+	ObjectIdAttributeNumber, 0, -1, -1,
+	true, 'p', false, 'i', false, false
 };
 
 static FormData_pg_attribute a3 = {
-	0xffffffff, {"xmin"}, XIDOID, 0, sizeof(TransactionId),
-	MinTransactionIdAttributeNumber, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0'
+	0, {"xmin"}, XIDOID, 0, sizeof(TransactionId),
+	MinTransactionIdAttributeNumber, 0, -1, -1,
+	true, 'p', false, 'i', false, false
 };
 
 static FormData_pg_attribute a4 = {
-	0xffffffff, {"cmin"}, CIDOID, 0, sizeof(CommandId),
-	MinCommandIdAttributeNumber, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0'
+	0, {"cmin"}, CIDOID, 0, sizeof(CommandId),
+	MinCommandIdAttributeNumber, 0, -1, -1,
+	true, 'p', false, 'i', false, false
 };
 
 static FormData_pg_attribute a5 = {
-	0xffffffff, {"xmax"}, XIDOID, 0, sizeof(TransactionId),
-	MaxTransactionIdAttributeNumber, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0'
+	0, {"xmax"}, XIDOID, 0, sizeof(TransactionId),
+	MaxTransactionIdAttributeNumber, 0, -1, -1,
+	true, 'p', false, 'i', false, false
 };
 
 static FormData_pg_attribute a6 = {
-	0xffffffff, {"cmax"}, CIDOID, 0, sizeof(CommandId),
-	MaxCommandIdAttributeNumber, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0'
+	0, {"cmax"}, CIDOID, 0, sizeof(CommandId),
+	MaxCommandIdAttributeNumber, 0, -1, -1,
+	true, 'p', false, 'i', false, false
 };
 
 /*
-   We decide to call this attribute "tableoid" rather than say
-"classoid" on the basis that in the future there may be more than one
-table of a particular class/type. In any case table is still the word
-used in SQL.
-*/
+ * We decided to call this attribute "tableoid" rather than say
+ * "classoid" on the basis that in the future there may be more than one
+ * table of a particular class/type. In any case table is still the word
+ * used in SQL.
+ */
 static FormData_pg_attribute a7 = {
-	0xffffffff, {"tableoid"}, OIDOID, 0, sizeof(Oid),
-	TableOidAttributeNumber, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0'
+	0, {"tableoid"}, OIDOID, 0, sizeof(Oid),
+	TableOidAttributeNumber, 0, -1, -1,
+	true, 'p', false, 'i', false, false
 };
 
-static Form_pg_attribute HeapAtt[] = {&a1, &a2, &a3, &a4, &a5, &a6, &a7};
+static Form_pg_attribute SysAtt[] = {&a1, &a2, &a3, &a4, &a5, &a6, &a7};
+
+/*
+ * This function returns a Form_pg_attribute pointer for a system attribute.
+ */
+Form_pg_attribute
+SystemAttributeDefinition(AttrNumber attno)
+{
+	if (attno >= 0 || attno < - (int) lengthof(SysAtt))
+		elog(ERROR, "SystemAttributeDefinition: invalid attribute number %d",
+			 attno);
+	return SysAtt[-attno - 1];
+}
 
 /* ----------------------------------------------------------------
  *				XXX END OF UGLY HARD CODED BADNESS XXX
@@ -380,32 +398,6 @@ heap_storage_create(Relation rel)
  *		8) the relations are closed and the new relation's oid
  *		   is returned.
  *
- * old comments:
- *		A new relation is inserted into the RELATION relation
- *		with the specified attribute(s) (newly inserted into
- *		the ATTRIBUTE relation).  How does concurrency control
- *		work?  Is it automatic now?  Expects the caller to have
- *		attname, atttypid, atttyparg, attproc, and attlen domains filled.
- *		Create fills the attnum domains sequentually from zero,
- *		fills the attdispersion domains with zeros, and fills the
- *		attrelid fields with the relid.
- *
- *		scan relation catalog for name conflict
- *		scan type catalog for typids (if not arg)
- *		create and insert attribute(s) into attribute catalog
- *		create new relation
- *		insert new relation into attribute catalog
- *
- *		Should coordinate with heap_create_with_catalog(). Either
- *		it should not be called or there should be a way to prevent
- *		the relation from being removed at the end of the
- *		transaction if it is successful ('u'/'r' may be enough).
- *		Also, if the transaction does not commit, then the
- *		relation should be removed.
- *
- *		XXX amcreate ignores "off" when inserting (for now).
- *		XXX amcreate (like the other utilities) needs to understand indexes.
- *
  * ----------------------------------------------------------------
  */
 
@@ -432,14 +424,14 @@ CheckAttributeNames(TupleDesc tupdesc)
 	 */
 	for (i = 0; i < natts; i++)
 	{
-		for (j = 0; j < (int) (sizeof(HeapAtt) / sizeof(HeapAtt[0])); j++)
+		for (j = 0; j < (int) lengthof(SysAtt); j++)
 		{
-			if (strcmp(NameStr(HeapAtt[j]->attname),
+			if (strcmp(NameStr(SysAtt[j]->attname),
 					   NameStr(tupdesc->attrs[i]->attname)) == 0)
 			{
 				elog(ERROR, "Attribute '%s' has a name conflict"
 					 "\n\tName matches an existing system attribute",
-					 NameStr(HeapAtt[j]->attname));
+					 NameStr(SysAtt[j]->attname));
 			}
 		}
 		if (tupdesc->attrs[i]->atttypid == UNKNOWNOID)
@@ -574,7 +566,7 @@ AddNewAttributeTuples(Oid new_rel_oid,
 		/* Fill in the correct relation OID */
 		(*dpp)->attrelid = new_rel_oid;
 		/* Make sure these are OK, too */
-		(*dpp)->attdispersion = 0;
+		(*dpp)->attstattarget = DEFAULT_ATTSTATTARGET;
 		(*dpp)->attcacheoff = -1;
 
 		tup = heap_addheader(Natts_pg_attribute,
@@ -593,14 +585,14 @@ AddNewAttributeTuples(Oid new_rel_oid,
 	/*
 	 * next we add the system attributes..
 	 */
-	dpp = HeapAtt;
+	dpp = SysAtt;
 	for (i = 0; i < -1 - FirstLowInvalidHeapAttributeNumber; i++)
 	{
 		/* Fill in the correct relation OID */
 		/* HACK: we are writing on static data here */
 		(*dpp)->attrelid = new_rel_oid;
 		/* Unneeded since they should be OK in the constant data anyway */
-		/* (*dpp)->attdispersion = 0; */
+		/* (*dpp)->attstattarget = 0; */
 		/* (*dpp)->attcacheoff = -1; */
 
 		tup = heap_addheader(Natts_pg_attribute,
@@ -669,8 +661,23 @@ AddNewRelationTuple(Relation pg_class_desc,
 	 * save. (NOTE: CREATE INDEX inserts the same bogus estimates if it
 	 * finds the relation has 0 rows and pages. See index.c.)
 	 */
-	new_rel_reltup->relpages = 10;		/* bogus estimates */
-	new_rel_reltup->reltuples = 1000;
+	switch (relkind)
+	{
+		case RELKIND_RELATION:
+		case RELKIND_INDEX:
+		case RELKIND_TOASTVALUE:
+			new_rel_reltup->relpages = 10;	/* bogus estimates */
+			new_rel_reltup->reltuples = 1000;
+			break;
+		case RELKIND_SEQUENCE:
+			new_rel_reltup->relpages = 1;
+			new_rel_reltup->reltuples = 1;
+			break;
+		default:				/* views, etc */
+			new_rel_reltup->relpages = 0;
+			new_rel_reltup->reltuples = 0;
+			break;
+	}
 
 	new_rel_reltup->relowner = GetUserId();
 	new_rel_reltup->reltype = new_type_oid;
diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c
index 2adb30e1ed8..5eefab11489 100644
--- a/src/backend/catalog/index.c
+++ b/src/backend/catalog/index.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/catalog/index.c,v 1.145 2001/04/02 14:34:25 momjian Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/catalog/index.c,v 1.146 2001/05/07 00:43:17 tgl Exp $
  *
  *
  * INTERFACE ROUTINES
@@ -55,7 +55,7 @@
  */
 #define AVG_ATTR_SIZE 8
 #define NTUPLES_PER_PAGE(natts) \
-	((BLCKSZ - MAXALIGN(sizeof (PageHeaderData))) / \
+	((BLCKSZ - MAXALIGN(sizeof(PageHeaderData))) / \
 	((natts) * AVG_ATTR_SIZE + MAXALIGN(sizeof(HeapTupleHeaderData))))
 
 /* non-export function prototypes */
@@ -99,39 +99,6 @@ IsReindexProcessing(void)
 }
 
 /* ----------------------------------------------------------------
- *	  sysatts is a structure containing attribute tuple forms
- *	  for system attributes (numbered -1, -2, ...).  This really
- *	  should be generated or eliminated or moved elsewhere. -cim 1/19/91
- *
- * typedef struct FormData_pg_attribute {
- *		Oid				attrelid;
- *		NameData		attname;
- *		Oid				atttypid;
- *		uint32			attnvals;
- *		int16			attlen;
- *		AttrNumber		attnum;
- *		uint32			attnelems;
- *		int32			attcacheoff;
- *		int32			atttypmod;
- *		bool			attbyval;
- *		bool			attisset;
- *		char			attalign;
- *		bool			attnotnull;
- *		bool			atthasdef;
- * } FormData_pg_attribute;
- *
- * ----------------------------------------------------------------
- */
-static FormData_pg_attribute sysatts[] = {
-	{0, {"ctid"}, TIDOID, 0, 6, -1, 0, -1, -1, '\0', 'p', '\0', 'i', '\0', '\0'},
-	{0, {"oid"}, OIDOID, 0, 4, -2, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0'},
-	{0, {"xmin"}, XIDOID, 0, 4, -3, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0'},
-	{0, {"cmin"}, CIDOID, 0, 4, -4, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0'},
-	{0, {"xmax"}, XIDOID, 0, 4, -5, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0'},
-	{0, {"cmax"}, CIDOID, 0, 4, -6, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0'},
-};
-
-/* ----------------------------------------------------------------
  *		GetHeapRelationOid
  * ----------------------------------------------------------------
  */
@@ -250,7 +217,6 @@ ConstructTupleDescriptor(Relation heapRelation,
 	for (i = 0; i < numatts; i++)
 	{
 		AttrNumber	atnum;		/* attributeNumber[attributeOffset] */
-		AttrNumber	atind;
 		Form_pg_attribute from;
 		Form_pg_attribute to;
 
@@ -264,16 +230,9 @@ ConstructTupleDescriptor(Relation heapRelation,
 		{
 
 			/*
-			 * here we are indexing on a system attribute (-1...-n) so we
-			 * convert atnum into a usable index 0...n-1 so we can use it
-			 * to dereference the array sysatts[] which stores tuple
-			 * descriptor information for system attributes.
+			 * here we are indexing on a system attribute (-1...-n)
 			 */
-			if (atnum <= FirstLowInvalidHeapAttributeNumber || atnum >= 0)
-				elog(ERROR, "Cannot create index on system attribute: attribute number out of range (%d)", atnum);
-			atind = (-atnum) - 1;
-
-			from = &sysatts[atind];
+			from = SystemAttributeDefinition(atnum);
 		}
 		else
 		{
@@ -284,9 +243,8 @@ ConstructTupleDescriptor(Relation heapRelation,
 			if (atnum > natts)
 				elog(ERROR, "Cannot create index: attribute %d does not exist",
 					 atnum);
-			atind = AttrNumberGetAttrOffset(atnum);
 
-			from = heapTupDesc->attrs[atind];
+			from = heapTupDesc->attrs[AttrNumberGetAttrOffset(atnum)];
 		}
 
 		/*
@@ -303,10 +261,10 @@ ConstructTupleDescriptor(Relation heapRelation,
 		 */
 		to->attnum = i + 1;
 
-		to->attdispersion = 0.0;
+		to->attstattarget = 0;
+		to->attcacheoff = -1;
 		to->attnotnull = false;
 		to->atthasdef = false;
-		to->attcacheoff = -1;
 
 		/*
 		 * We do not yet have the correct relation OID for the index, so
@@ -1542,10 +1500,14 @@ setNewRelfilenode(Relation relation)
 
 /* ----------------
  *		UpdateStats
+ *
+ * Update pg_class' relpages and reltuples statistics for the given relation
+ * (which can be either a table or an index).  Note that this is not used
+ * in the context of VACUUM.
  * ----------------
  */
 void
-UpdateStats(Oid relid, long reltuples)
+UpdateStats(Oid relid, double reltuples)
 {
 	Relation	whichRel;
 	Relation	pg_class;
@@ -1636,6 +1598,10 @@ UpdateStats(Oid relid, long reltuples)
 	 * with zero size statistics until a VACUUM is done.  The optimizer
 	 * will generate very bad plans if the stats claim the table is empty
 	 * when it is actually sizable.  See also CREATE TABLE in heap.c.
+	 *
+	 * Note: this path is also taken during bootstrap, because bootstrap.c
+	 * passes reltuples = 0 after loading a table.  We have to estimate some
+	 * number for reltuples based on the actual number of pages.
 	 */
 	relpages = RelationGetNumberOfBlocks(whichRel);
 
@@ -1689,15 +1655,15 @@ UpdateStats(Oid relid, long reltuples)
 
 		for (i = 0; i < Natts_pg_class; i++)
 		{
-			nulls[i] = heap_attisnull(tuple, i + 1) ? 'n' : ' ';
+			nulls[i] = ' ';
 			replace[i] = ' ';
 			values[i] = (Datum) NULL;
 		}
 
 		replace[Anum_pg_class_relpages - 1] = 'r';
-		values[Anum_pg_class_relpages - 1] = (Datum) relpages;
+		values[Anum_pg_class_relpages - 1] = Int32GetDatum(relpages);
 		replace[Anum_pg_class_reltuples - 1] = 'r';
-		values[Anum_pg_class_reltuples - 1] = (Datum) reltuples;
+		values[Anum_pg_class_reltuples - 1] = Float4GetDatum((float4) reltuples);
 		newtup = heap_modifytuple(tuple, pg_class, values, nulls, replace);
 		simple_heap_update(pg_class, &tuple->t_self, newtup);
 		if (!IsIgnoringSystemIndexes())
@@ -1741,7 +1707,7 @@ DefaultBuild(Relation heapRelation,
 	TupleDesc	heapDescriptor;
 	Datum		datum[INDEX_MAX_KEYS];
 	char		nullv[INDEX_MAX_KEYS];
-	long		reltuples,
+	double		reltuples,
 				indtuples;
 	Node	   *predicate = indexInfo->ii_Predicate;
 
@@ -1796,7 +1762,7 @@ DefaultBuild(Relation heapRelation,
 						  0,	/* number of keys */
 						  (ScanKey) NULL);		/* scan key */
 
-	reltuples = indtuples = 0;
+	reltuples = indtuples = 0.0;
 
 	/*
 	 * for each tuple in the base relation, we create an index tuple and
@@ -1808,7 +1774,7 @@ DefaultBuild(Relation heapRelation,
 	{
 		MemoryContextReset(econtext->ecxt_per_tuple_memory);
 
-		reltuples++;
+		reltuples += 1.0;
 
 #ifndef OMIT_PARTIAL_INDEX
 
@@ -1821,7 +1787,7 @@ DefaultBuild(Relation heapRelation,
 			slot->val = heapTuple;
 			if (ExecQual((List *) oldPred, econtext, false))
 			{
-				indtuples++;
+				indtuples += 1.0;
 				continue;
 			}
 		}
@@ -1838,7 +1804,7 @@ DefaultBuild(Relation heapRelation,
 		}
 #endif	 /* OMIT_PARTIAL_INDEX */
 
-		indtuples++;
+		indtuples += 1.0;
 
 		/*
 		 * FormIndexDatum fills in its datum and null parameters with
diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c
index 88e56869da5..24cc7a8b254 100644
--- a/src/backend/commands/analyze.c
+++ b/src/backend/commands/analyze.c
@@ -8,19 +8,16 @@
  *
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/commands/analyze.c,v 1.16 2001/03/22 06:16:11 momjian Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/commands/analyze.c,v 1.17 2001/05/07 00:43:17 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
 #include "postgres.h"
 
-#include <sys/types.h>
-#include <sys/file.h>
-#include <sys/stat.h>
-#include <fcntl.h>
-#include <unistd.h>
+#include <math.h>
 
 #include "access/heapam.h"
+#include "access/tuptoaster.h"
 #include "catalog/catname.h"
 #include "catalog/indexing.h"
 #include "catalog/pg_operator.h"
@@ -29,43 +26,139 @@
 #include "commands/vacuum.h"
 #include "miscadmin.h"
 #include "parser/parse_oper.h"
-#include "tcop/tcopprot.h"
 #include "utils/acl.h"
 #include "utils/builtins.h"
+#include "utils/datum.h"
 #include "utils/fmgroids.h"
-#include "utils/inval.h"
 #include "utils/syscache.h"
+#include "utils/tuplesort.h"
 
-#define swapLong(a,b)	{long tmp; tmp=a; a=b; b=tmp;}
-#define swapInt(a,b)	{int tmp; tmp=a; a=b; b=tmp;}
-#define swapDatum(a,b)	{Datum tmp; tmp=a; a=b; b=tmp;}
-#define VacAttrStatsEqValid(stats) ( stats->f_cmpeq.fn_addr != NULL )
-#define VacAttrStatsLtGtValid(stats) ( stats->f_cmplt.fn_addr != NULL && \
-								   stats->f_cmpgt.fn_addr != NULL && \
-								   RegProcedureIsValid(stats->outfunc) )
 
+/*
+ * Analysis algorithms supported
+ */
+typedef enum {
+	ALG_MINIMAL = 1,			/* Compute only most-common-values */
+	ALG_SCALAR					/* Compute MCV, histogram, sort correlation */
+} AlgCode;
+
+/*
+ * To avoid consuming too much memory during analysis and/or too much space
+ * in the resulting pg_statistic rows, we ignore varlena datums that are wider
+ * than WIDTH_THRESHOLD (after detoasting!).  This is legitimate for MCV
+ * and distinct-value calculations since a wide value is unlikely to be
+ * duplicated at all, much less be a most-common value.  For the same reason,
+ * ignoring wide values will not affect our estimates of histogram bin
+ * boundaries very much.
+ */
+#define WIDTH_THRESHOLD  256
+
+/*
+ * We build one of these structs for each attribute (column) that is to be
+ * analyzed.  The struct and subsidiary data are in TransactionCommandContext,
+ * so they live until the end of the ANALYZE operation.
+ */
+typedef struct
+{
+	/* These fields are set up by examine_attribute */
+	int			attnum;			/* attribute number */
+	AlgCode		algcode;		/* Which algorithm to use for this column */
+	int			minrows;		/* Minimum # of rows needed for stats */
+	Form_pg_attribute attr;		/* copy of pg_attribute row for column */
+	Form_pg_type attrtype;		/* copy of pg_type row for column */
+	Oid			eqopr;			/* '=' operator for datatype, if any */
+	Oid			eqfunc;			/* and associated function */
+	Oid			ltopr;			/* '<' operator for datatype, if any */
+
+	/* These fields are filled in by the actual statistics-gathering routine */
+	bool		stats_valid;
+	float4		stanullfrac;	/* fraction of entries that are NULL */
+	int4		stawidth;		/* average width */
+	float4		stadistinct;	/* # distinct values */
+	int2		stakind[STATISTIC_NUM_SLOTS];
+	Oid			staop[STATISTIC_NUM_SLOTS];
+	int			numnumbers[STATISTIC_NUM_SLOTS];
+	float4	   *stanumbers[STATISTIC_NUM_SLOTS];
+	int			numvalues[STATISTIC_NUM_SLOTS];
+	Datum	   *stavalues[STATISTIC_NUM_SLOTS];
+} VacAttrStats;
+
+
+typedef struct
+{
+	Datum		value;			/* a data value */
+	int			tupno;			/* position index for tuple it came from */
+} ScalarItem;
+
+typedef struct
+{
+	int			count;			/* # of duplicates */
+	int			first;			/* values[] index of first occurrence */
+} ScalarMCVItem;
+
+
+#define swapInt(a,b)	{int _tmp; _tmp=a; a=b; b=_tmp;}
+#define swapDatum(a,b)	{Datum _tmp; _tmp=a; a=b; b=_tmp;}
 
-static void attr_stats(Relation onerel, int attr_cnt, VacAttrStats *vacattrstats, HeapTuple tuple);
-static void bucketcpy(Form_pg_attribute attr, Datum value, Datum *bucket, int *bucket_len);
-static void update_attstats(Oid relid, int natts, VacAttrStats *vacattrstats);
-static void del_stats(Oid relid, int attcnt, int *attnums);
+
+static int MESSAGE_LEVEL;
+
+/* context information for compare_scalars() */
+static FmgrInfo *datumCmpFn;
+static SortFunctionKind datumCmpFnKind;
+static int *datumCmpTupnoLink;
+
+
+static VacAttrStats *examine_attribute(Relation onerel, int attnum);
+static int acquire_sample_rows(Relation onerel, HeapTuple *rows,
+							   int targrows, long *totalrows);
+static double random_fract(void);
+static double init_selection_state(int n);
+static long select_next_random_record(long t, int n, double *stateptr);
+static int compare_rows(const void *a, const void *b);
+static int compare_scalars(const void *a, const void *b);
+static int compare_mcvs(const void *a, const void *b);
+static OffsetNumber get_page_max_offset(Relation relation,
+										BlockNumber blocknumber);
+static void compute_minimal_stats(VacAttrStats *stats,
+								  TupleDesc tupDesc, long totalrows,
+								  HeapTuple *rows, int numrows);
+static void compute_scalar_stats(VacAttrStats *stats,
+								 TupleDesc tupDesc, long totalrows,
+								 HeapTuple *rows, int numrows);
+static void update_attstats(Oid relid, int natts, VacAttrStats **vacattrstats);
 
 
 /*
- *	analyze_rel() -- analyze relation
+ *	analyze_rel() -- analyze one relation
  */
 void
-analyze_rel(Oid relid, List *anal_cols2, int MESSAGE_LEVEL)
+analyze_rel(Oid relid, VacuumStmt *vacstmt)
 {
-	HeapTuple	tuple;
 	Relation	onerel;
-	int32		i;
-	int			attr_cnt,
-			   *attnums = NULL;
 	Form_pg_attribute *attr;
-	VacAttrStats *vacattrstats;
-	HeapScanDesc scan;
+	int			attr_cnt,
+				tcnt,
+				i;
+	VacAttrStats **vacattrstats;
+	int			targrows,
+				numrows;
+	long		totalrows;
+	HeapTuple  *rows;
+	HeapTuple	tuple;
+
+	if (vacstmt->verbose)
+		MESSAGE_LEVEL = NOTICE;
+	else
+		MESSAGE_LEVEL = DEBUG;
 
+	/*
+	 * Begin a transaction for analyzing this relation.
+	 *
+	 * Note: All memory allocated during ANALYZE will live in
+	 * TransactionCommandContext or a subcontext thereof, so it will
+	 * all be released by transaction commit at the end of this routine.
+	 */
 	StartTransactionCommand();
 
 	/*
@@ -76,7 +169,7 @@ analyze_rel(Oid relid, List *anal_cols2, int MESSAGE_LEVEL)
 
 	/*
 	 * Race condition -- if the pg_class tuple has gone away since the
-	 * last time we saw it, we don't need to vacuum it.
+	 * last time we saw it, we don't need to process it.
 	 */
 	tuple = SearchSysCache(RELOID,
 						   ObjectIdGetDatum(relid),
@@ -88,8 +181,7 @@ analyze_rel(Oid relid, List *anal_cols2, int MESSAGE_LEVEL)
 	}
 
 	/*
-	 * We can VACUUM ANALYZE any table except pg_statistic. see
-	 * update_relstats
+	 * We can ANALYZE any table except pg_statistic. See update_attstats
 	 */
 	if (strcmp(NameStr(((Form_pg_class) GETSTRUCT(tuple))->relname),
 			   StatisticRelationName) == 0)
@@ -100,586 +192,1466 @@ analyze_rel(Oid relid, List *anal_cols2, int MESSAGE_LEVEL)
 	}
 	ReleaseSysCache(tuple);
 
+	/*
+	 * Open the class, getting only a read lock on it, and check permissions
+	 */
 	onerel = heap_open(relid, AccessShareLock);
 
 	if (!pg_ownercheck(GetUserId(), RelationGetRelationName(onerel),
 					   RELNAME))
 	{
-
-		/*
-		 * we already did an elog during vacuum elog(NOTICE, "Skipping
-		 * \"%s\" --- only table owner can VACUUM it",
-		 * RelationGetRelationName(onerel));
-		 */
+		/* No need for a notice if we already complained during VACUUM */
+		if (!vacstmt->vacuum)
+			elog(NOTICE, "Skipping \"%s\" --- only table owner can ANALYZE it",
+				 RelationGetRelationName(onerel));
 		heap_close(onerel, NoLock);
 		CommitTransactionCommand();
 		return;
 	}
 
-	elog(MESSAGE_LEVEL, "Analyzing...");
+	elog(MESSAGE_LEVEL, "Analyzing %s", RelationGetRelationName(onerel));
 
-	attr_cnt = onerel->rd_att->natts;
+	/*
+	 * Determine which columns to analyze
+	 *
+	 * Note that system attributes are never analyzed.
+	 */
 	attr = onerel->rd_att->attrs;
+	attr_cnt = onerel->rd_att->natts;
 
-	if (anal_cols2 != NIL)
+	if (vacstmt->va_cols != NIL)
 	{
-		int			tcnt = 0;
 		List	   *le;
 
-		if (length(anal_cols2) > attr_cnt)
-			elog(ERROR, "vacuum: too many attributes specified for relation %s",
-				 RelationGetRelationName(onerel));
-		attnums = (int *) palloc(attr_cnt * sizeof(int));
-		foreach(le, anal_cols2)
+		vacattrstats = (VacAttrStats **) palloc(length(vacstmt->va_cols) *
+												sizeof(VacAttrStats *));
+		tcnt = 0;
+		foreach(le, vacstmt->va_cols)
 		{
-			char	   *col = (char *) lfirst(le);
+			char	   *col = strVal(lfirst(le));
 
 			for (i = 0; i < attr_cnt; i++)
 			{
 				if (namestrcmp(&(attr[i]->attname), col) == 0)
 					break;
 			}
-			if (i < attr_cnt)	/* found */
-				attnums[tcnt++] = i;
-			else
-			{
-				elog(ERROR, "vacuum: there is no attribute %s in %s",
+			if (i >= attr_cnt)
+				elog(ERROR, "ANALYZE: there is no attribute %s in %s",
 					 col, RelationGetRelationName(onerel));
-			}
+			vacattrstats[tcnt] = examine_attribute(onerel, i+1);
+			if (vacattrstats[tcnt] != NULL)
+				tcnt++;
+		}
+		attr_cnt = tcnt;
+	}
+	else
+	{
+		vacattrstats = (VacAttrStats **) palloc(attr_cnt *
+												sizeof(VacAttrStats *));
+		tcnt = 0;
+		for (i = 0; i < attr_cnt; i++)
+		{
+			vacattrstats[tcnt] = examine_attribute(onerel, i+1);
+			if (vacattrstats[tcnt] != NULL)
+				tcnt++;
 		}
 		attr_cnt = tcnt;
 	}
 
-	vacattrstats = (VacAttrStats *) palloc(attr_cnt * sizeof(VacAttrStats));
+	/*
+	 * Quit if no analyzable columns
+	 */
+	if (attr_cnt <= 0)
+	{
+		heap_close(onerel, NoLock);
+		CommitTransactionCommand();
+		return;
+	}
 
+	/*
+	 * Determine how many rows we need to sample, using the worst case
+	 * from all analyzable columns.  We use a lower bound of 100 rows
+	 * to avoid possible overflow in Vitter's algorithm.
+	 */
+	targrows = 100;
 	for (i = 0; i < attr_cnt; i++)
 	{
-		Operator	func_operator;
-		VacAttrStats *stats;
-
-		stats = &vacattrstats[i];
-		stats->attr = palloc(ATTRIBUTE_TUPLE_SIZE);
-		memcpy(stats->attr, attr[((attnums) ? attnums[i] : i)],
-			   ATTRIBUTE_TUPLE_SIZE);
-		stats->best = stats->guess1 = stats->guess2 = 0;
-		stats->max = stats->min = 0;
-		stats->best_len = stats->guess1_len = stats->guess2_len = 0;
-		stats->max_len = stats->min_len = 0;
-		stats->initialized = false;
-		stats->best_cnt = stats->guess1_cnt = stats->guess1_hits = stats->guess2_hits = 0;
-		stats->max_cnt = stats->min_cnt = stats->null_cnt = stats->nonnull_cnt = 0;
-
-		func_operator = compatible_oper("=",
-										stats->attr->atttypid,
-										stats->attr->atttypid,
-										true);
-		if (func_operator != NULL)
-		{
-			fmgr_info(oprfuncid(func_operator), &(stats->f_cmpeq));
-			ReleaseSysCache(func_operator);
-		}
-		else
-			stats->f_cmpeq.fn_addr = NULL;
+		if (targrows < vacattrstats[i]->minrows)
+			targrows = vacattrstats[i]->minrows;
+	}
+
+	/*
+	 * Acquire the sample rows
+	 */
+	rows = (HeapTuple *) palloc(targrows * sizeof(HeapTuple));
+	numrows = acquire_sample_rows(onerel, rows, targrows, &totalrows);
 
-		func_operator = compatible_oper("<",
-										stats->attr->atttypid,
-										stats->attr->atttypid,
-										true);
-		if (func_operator != NULL)
+	/*
+	 * If we are running a standalone ANALYZE, update pages/tuples stats
+	 * in pg_class.  We have the accurate page count from heap_beginscan,
+	 * but only an approximate number of tuples; therefore, if we are
+	 * part of VACUUM ANALYZE do *not* overwrite the accurate count already
+	 * inserted by VACUUM.
+	 */
+	if (!vacstmt->vacuum)
+		vac_update_relstats(RelationGetRelid(onerel),
+							onerel->rd_nblocks,
+							(double) totalrows,
+							RelationGetForm(onerel)->relhasindex);
+
+	/*
+	 * Compute the statistics.  Temporary results during the calculations
+	 * for each column are stored in a child context.  The calc routines
+	 * are responsible to make sure that whatever they store into the
+	 * VacAttrStats structure is allocated in TransactionCommandContext.
+	 */
+	if (numrows > 0)
+	{
+		MemoryContext col_context,
+					old_context;
+
+		col_context = AllocSetContextCreate(CurrentMemoryContext,
+											"Analyze Column",
+											ALLOCSET_DEFAULT_MINSIZE,
+											ALLOCSET_DEFAULT_INITSIZE,
+											ALLOCSET_DEFAULT_MAXSIZE);
+		old_context = MemoryContextSwitchTo(col_context);
+		for (i = 0; i < attr_cnt; i++)
 		{
-			fmgr_info(oprfuncid(func_operator), &(stats->f_cmplt));
-			stats->op_cmplt = oprid(func_operator);
-			ReleaseSysCache(func_operator);
+			switch (vacattrstats[i]->algcode)
+			{
+				case ALG_MINIMAL:
+					compute_minimal_stats(vacattrstats[i],
+										  onerel->rd_att, totalrows,
+										  rows, numrows);
+					break;
+				case ALG_SCALAR:
+					compute_scalar_stats(vacattrstats[i],
+										 onerel->rd_att, totalrows,
+										 rows, numrows);
+					break;
+			}
+			MemoryContextResetAndDeleteChildren(col_context);
 		}
-		else
+		MemoryContextSwitchTo(old_context);
+		MemoryContextDelete(col_context);
+
+		/*
+		 * Emit the completed stats rows into pg_statistic, replacing any
+		 * previous statistics for the target columns.  (If there are stats
+		 * in pg_statistic for columns we didn't process, we leave them alone.)
+		 */
+		update_attstats(relid, attr_cnt, vacattrstats);
+	}
+
+	/*
+	 * Close source relation now, but keep lock so that no one deletes it
+	 * before we commit.  (If someone did, they'd fail to clean up the
+	 * entries we made in pg_statistic.)
+	 */
+	heap_close(onerel, NoLock);
+
+	/* Commit and release working memory */
+	CommitTransactionCommand();
+}
+
+/*
+ * examine_attribute -- pre-analysis of a single column
+ *
+ * Determine whether the column is analyzable; if so, create and initialize
+ * a VacAttrStats struct for it.  If not, return NULL.
+ */
+static VacAttrStats *
+examine_attribute(Relation onerel, int attnum)
+{
+	Form_pg_attribute attr = onerel->rd_att->attrs[attnum-1];
+	Operator	func_operator;
+	Oid			oprrest;
+	HeapTuple	typtuple;
+	Oid			eqopr = InvalidOid;
+	Oid			eqfunc = InvalidOid;
+	Oid			ltopr = InvalidOid;
+	VacAttrStats *stats;
+
+	/* Don't analyze column if user has specified not to */
+	if (attr->attstattarget <= 0)
+		return NULL;
+
+	/* If column has no "=" operator, we can't do much of anything */
+	func_operator = compatible_oper("=",
+									attr->atttypid,
+									attr->atttypid,
+									true);
+	if (func_operator != NULL)
+	{
+		oprrest = ((Form_pg_operator) GETSTRUCT(func_operator))->oprrest;
+		if (oprrest == F_EQSEL)
 		{
-			stats->f_cmplt.fn_addr = NULL;
-			stats->op_cmplt = InvalidOid;
+			eqopr = oprid(func_operator);
+			eqfunc = oprfuncid(func_operator);
 		}
+		ReleaseSysCache(func_operator);
+	}
+	if (!OidIsValid(eqfunc))
+		return NULL;
 
-		func_operator = compatible_oper(">",
-										stats->attr->atttypid,
-										stats->attr->atttypid,
-										true);
-		if (func_operator != NULL)
+	/*
+	 * If we have "=" then we're at least able to do the minimal algorithm,
+	 * so start filling in a VacAttrStats struct.
+	 */
+	stats = (VacAttrStats *) palloc(sizeof(VacAttrStats));
+	MemSet(stats, 0, sizeof(VacAttrStats));
+	stats->attnum = attnum;
+	stats->attr = (Form_pg_attribute) palloc(ATTRIBUTE_TUPLE_SIZE);
+	memcpy(stats->attr, attr, ATTRIBUTE_TUPLE_SIZE);
+	typtuple = SearchSysCache(TYPEOID,
+							  ObjectIdGetDatum(attr->atttypid),
+							  0, 0, 0);
+	if (!HeapTupleIsValid(typtuple))
+		elog(ERROR, "cache lookup of type %u failed", attr->atttypid);
+	stats->attrtype = (Form_pg_type) palloc(sizeof(FormData_pg_type));
+	memcpy(stats->attrtype, GETSTRUCT(typtuple), sizeof(FormData_pg_type));
+	ReleaseSysCache(typtuple);
+	stats->eqopr = eqopr;
+	stats->eqfunc = eqfunc;
+
+	/* Is there a "<" operator with suitable semantics? */
+	func_operator = compatible_oper("<",
+									attr->atttypid,
+									attr->atttypid,
+									true);
+	if (func_operator != NULL)
+	{
+		oprrest = ((Form_pg_operator) GETSTRUCT(func_operator))->oprrest;
+		if (oprrest == F_SCALARLTSEL)
 		{
-			fmgr_info(oprfuncid(func_operator), &(stats->f_cmpgt));
-			ReleaseSysCache(func_operator);
+			ltopr = oprid(func_operator);
 		}
-		else
-			stats->f_cmpgt.fn_addr = NULL;
+		ReleaseSysCache(func_operator);
+	}
+	stats->ltopr = ltopr;
+
+	/*
+	 * Determine the algorithm to use (this will get more complicated later)
+	 */
+	if (OidIsValid(ltopr))
+	{
+		/* Seems to be a scalar datatype */
+		stats->algcode = ALG_SCALAR;
+		/*--------------------
+		 * The following choice of minrows is based on the paper
+		 * "Random sampling for histogram construction: how much is enough?"
+		 * by Surajit Chaudhuri, Rajeev Motwani and Vivek Narasayya, in
+		 * Proceedings of ACM SIGMOD International Conference on Management
+		 * of Data, 1998, Pages 436-447.  Their Corollary 1 to Theorem 5
+		 * says that for table size n, histogram size k, maximum relative
+		 * error in bin size f, and error probability gamma, the minimum
+		 * random sample size is
+		 *		r = 4 * k * ln(2*n/gamma) / f^2
+		 * Taking f = 0.5, gamma = 0.01, n = 1 million rows, we obtain
+		 *		r = 305.82 * k
+		 * Note that because of the log function, the dependence on n is
+		 * quite weak; even at n = 1 billion, a 300*k sample gives <= 0.59
+		 * bin size error with probability 0.99.  So there's no real need to
+		 * scale for n, which is a good thing because we don't necessarily
+		 * know it at this point.
+		 *--------------------
+		 */
+		stats->minrows = 300 * attr->attstattarget;
+	}
+	else
+	{
+		/* Can't do much but the minimal stuff */
+		stats->algcode = ALG_MINIMAL;
+		/* Might as well use the same minrows as above */
+		stats->minrows = 300 * attr->attstattarget;
+	}
+
+	return stats;
+}
 
-		tuple = SearchSysCache(TYPEOID,
-							   ObjectIdGetDatum(stats->attr->atttypid),
-							   0, 0, 0);
-		if (HeapTupleIsValid(tuple))
+/*
+ * acquire_sample_rows -- acquire a random sample of rows from the table
+ *
+ * Up to targrows rows are collected (if there are fewer than that many
+ * rows in the table, all rows are collected).  When the table is larger
+ * than targrows, a truly random sample is collected: every row has an
+ * equal chance of ending up in the final sample.
+ *
+ * We also estimate the total number of rows in the table, and return that
+ * into *totalrows.
+ *
+ * The returned list of tuples is in order by physical position in the table.
+ * (We will rely on this later to derive correlation estimates.)
+ */
+static int
+acquire_sample_rows(Relation onerel, HeapTuple *rows, int targrows,
+					long *totalrows)
+{
+	int			numrows = 0;
+	HeapScanDesc scan;
+	HeapTuple	tuple;
+	ItemPointer	lasttuple;
+	BlockNumber	lastblock,
+				estblock;
+	OffsetNumber lastoffset;
+	int			numest;
+	double		tuplesperpage;
+	long		t;
+	double		rstate;
+
+	Assert(targrows > 1);
+	/*
+	 * Do a simple linear scan until we reach the target number of rows.
+	 */
+	scan = heap_beginscan(onerel, false, SnapshotNow, 0, NULL);
+	while (HeapTupleIsValid(tuple = heap_getnext(scan, 0)))
+	{
+		rows[numrows++] = heap_copytuple(tuple);
+		if (numrows >= targrows)
+			break;
+	}
+	heap_endscan(scan);
+	/*
+	 * If we ran out of tuples then we're done, no matter how few we 
+	 * collected.  No sort is needed, since they're already in order.
+	 */
+	if (!HeapTupleIsValid(tuple))
+	{
+		*totalrows = numrows;
+		return numrows;
+	}
+	/*
+	 * Otherwise, start replacing tuples in the sample until we reach the
+	 * end of the relation.  This algorithm is from Jeff Vitter's paper
+	 * (see full citation below).  It works by repeatedly computing the number
+	 * of the next tuple we want to fetch, which will replace a randomly
+	 * chosen element of the reservoir (current set of tuples).  At all times
+	 * the reservoir is a true random sample of the tuples we've passed over
+	 * so far, so when we fall off the end of the relation we're done.
+	 *
+	 * A slight difficulty is that since we don't want to fetch tuples or even
+	 * pages that we skip over, it's not possible to fetch *exactly* the N'th
+	 * tuple at each step --- we don't know how many valid tuples are on
+	 * the skipped pages.  We handle this by assuming that the average number
+	 * of valid tuples/page on the pages already scanned over holds good for
+	 * the rest of the relation as well; this lets us estimate which page
+	 * the next tuple should be on and its position in the page.  Then we
+	 * fetch the first valid tuple at or after that position, being careful
+	 * not to use the same tuple twice.  This approach should still give a
+	 * good random sample, although it's not perfect.
+	 */
+	lasttuple = &(rows[numrows-1]->t_self);
+	lastblock = ItemPointerGetBlockNumber(lasttuple);
+	lastoffset = ItemPointerGetOffsetNumber(lasttuple);
+	/*
+	 * If possible, estimate tuples/page using only completely-scanned pages.
+	 */
+	for (numest = numrows; numest > 0; numest--)
+	{
+		if (ItemPointerGetBlockNumber(&(rows[numest-1]->t_self)) != lastblock)
+			break;
+	}
+	if (numest == 0)
+	{
+		numest = numrows;		/* don't have a full page? */
+		estblock = lastblock + 1;
+	}
+	else
+	{
+		estblock = lastblock;
+	}
+	tuplesperpage = (double) numest / (double) estblock;
+
+	t = numrows;				/* t is the # of records processed so far */
+	rstate = init_selection_state(targrows);
+	for (;;)
+	{
+		double			targpos;
+		BlockNumber		targblock;
+		OffsetNumber	targoffset,
+						maxoffset;
+
+		t = select_next_random_record(t, targrows, &rstate);
+		/* Try to read the t'th record in the table */
+		targpos = (double) t / tuplesperpage;
+		targblock = (BlockNumber) targpos;
+		targoffset = ((int) (targpos - targblock) * tuplesperpage) + 
+			FirstOffsetNumber;
+		/* Make sure we are past the last selected record */
+		if (targblock <= lastblock)
 		{
-			stats->outfunc = ((Form_pg_type) GETSTRUCT(tuple))->typoutput;
-			stats->typelem = ((Form_pg_type) GETSTRUCT(tuple))->typelem;
-			ReleaseSysCache(tuple);
+			targblock = lastblock;
+			if (targoffset <= lastoffset)
+				targoffset = lastoffset + 1;
 		}
-		else
+		/* Loop to find first valid record at or after given position */
+	pageloop:;
+		/*
+		 * Have we fallen off the end of the relation?  (We rely on
+		 * heap_beginscan to have updated rd_nblocks.)
+		 */
+		if (targblock >= onerel->rd_nblocks)
+			break;
+		maxoffset = get_page_max_offset(onerel, targblock);
+		for (;;)
 		{
-			stats->outfunc = InvalidOid;
-			stats->typelem = InvalidOid;
+			HeapTupleData targtuple;
+			Buffer		targbuffer;
+
+			if (targoffset > maxoffset)
+			{
+				/* Fell off end of this page, try next */
+				targblock++;
+				targoffset = FirstOffsetNumber;
+				goto pageloop;
+			}
+			ItemPointerSet(&targtuple.t_self, targblock, targoffset);
+			heap_fetch(onerel, SnapshotNow, &targtuple, &targbuffer);
+			if (targtuple.t_data != NULL)
+			{
+				/*
+				 * Found a suitable tuple, so save it, replacing one old
+				 * tuple at random
+				 */
+				int		k = (int) (targrows * random_fract());
+
+				Assert(k >= 0 && k < targrows);
+				heap_freetuple(rows[k]);
+				rows[k] = heap_copytuple(&targtuple);
+				ReleaseBuffer(targbuffer);
+				lastblock = targblock;
+				lastoffset = targoffset;
+				break;
+			}
+			/* this tuple is dead, so advance to next one on same page */
+			targoffset++;
 		}
 	}
-	/* delete existing pg_statistic rows for relation */
-	del_stats(relid, ((attnums) ? attr_cnt : 0), attnums);
-
-	/* scan relation to gather statistics */
-	scan = heap_beginscan(onerel, false, SnapshotNow, 0, NULL);
 
-	while (HeapTupleIsValid(tuple = heap_getnext(scan, 0)))
-		attr_stats(onerel, attr_cnt, vacattrstats, tuple);
+	/*
+	 * Now we need to sort the collected tuples by position (itempointer).
+	 */
+	qsort((void *) rows, numrows, sizeof(HeapTuple), compare_rows);
 
-	heap_endscan(scan);
+	/*
+	 * Estimate total number of valid rows in relation.
+	 */
+	*totalrows = (long) (onerel->rd_nblocks * tuplesperpage + 0.5);
 
-	/* close rel, but keep lock so it doesn't go away before commit */
-	heap_close(onerel, NoLock);
+	return numrows;
+}
 
-	/* update statistics in pg_class */
-	update_attstats(relid, attr_cnt, vacattrstats);
+/* Select a random value R uniformly distributed in 0 < R < 1 */
+static double
+random_fract(void)
+{
+	long	z;
 
-	CommitTransactionCommand();
+	/* random() can produce endpoint values, try again if so */
+	do
+	{
+		z = random();
+	} while (! (z > 0 && z < MAX_RANDOM_VALUE));
+	return (double) z / (double) MAX_RANDOM_VALUE;
 }
 
 /*
- *	attr_stats() -- compute column statistics used by the planner
+ * These two routines embody Algorithm Z from "Random sampling with a
+ * reservoir" by Jeffrey S. Vitter, in ACM Trans. Math. Softw. 11, 1
+ * (Mar. 1985), Pages 37-57.  While Vitter describes his algorithm in terms
+ * of the count S of records to skip before processing another record,
+ * it is convenient to work primarily with t, the index (counting from 1)
+ * of the last record processed and next record to process.  The only extra
+ * state needed between calls is W, a random state variable.
  *
- *	We compute the column min, max, null and non-null counts.
- *	Plus we attempt to find the count of the value that occurs most
- *	frequently in each column.	These figures are used to compute
- *	the selectivity of the column.
+ * init_selection_state computes the initial W value.
  *
- *	We use a three-bucket cache to get the most frequent item.
- *	The 'guess' buckets count hits.  A cache miss causes guess1
- *	to get the most hit 'guess' item in the most recent cycle, and
- *	the new item goes into guess2.	Whenever the total count of hits
- *	of a 'guess' entry is larger than 'best', 'guess' becomes 'best'.
+ * Given that we've already processed t records (t >= n),
+ * select_next_random_record determines the number of the next record to
+ * process.
+ */
+static double
+init_selection_state(int n)
+{
+	/* Initial value of W (for use when Algorithm Z is first applied) */
+	return exp(- log(random_fract())/n);
+}
+
+static long
+select_next_random_record(long t, int n, double *stateptr)
+{
+	/* The magic constant here is T from Vitter's paper */
+	if (t <= (22 * n))
+	{
+		/* Process records using Algorithm X until t is large enough */
+		double	V,
+				quot;
+
+		V = random_fract();		/* Generate V */
+		t++;
+		quot = (double) (t - n) / (double) t;
+		/* Find min S satisfying (4.1) */
+		while (quot > V)
+		{
+			t++;
+			quot *= (double) (t - n) / (double) t;
+		}
+	}
+	else
+	{
+		/* Now apply Algorithm Z */
+		double	W = *stateptr;
+		long	term = t - n + 1;
+		int		S;
+
+		for (;;)
+		{
+			long	numer,
+					numer_lim,
+					denom;
+			double	U,
+					X,
+					lhs,
+					rhs,
+					y,
+					tmp;
+
+			/* Generate U and X */
+			U = random_fract();
+			X = t * (W - 1.0);
+			S = X;				/* S is tentatively set to floor(X) */
+			/* Test if U <= h(S)/cg(X) in the manner of (6.3) */
+			tmp = (double) (t + 1) / (double) term;
+			lhs = exp(log(((U * tmp * tmp) * (term + S))/(t + X))/n);
+			rhs = (((t + X)/(term + S)) * term)/t;
+			if (lhs <= rhs)
+			{
+				W = rhs/lhs;
+				break;
+			}
+			/* Test if U <= f(S)/cg(X) */
+			y = (((U * (t + 1))/term) * (t + S + 1))/(t + X);
+			if (n < S)
+			{
+				denom = t;
+				numer_lim = term + S;
+			}
+			else
+			{
+				denom = t - n + S;
+				numer_lim = t + 1;
+			}
+			for (numer = t + S; numer >= numer_lim; numer--)
+			{
+				y *= (double) numer / (double) denom;
+				denom--;
+			}
+			W = exp(- log(random_fract())/n); /* Generate W in advance */
+			if (exp(log(y)/n) <= (t + X)/t)
+				break;
+		}
+		t += S + 1;
+		*stateptr = W;
+	}
+	return t;
+}
+
+/*
+ * qsort comparator for sorting rows[] array
+ */
+static int
+compare_rows(const void *a, const void *b)
+{
+	HeapTuple	ha = * (HeapTuple *) a;
+	HeapTuple	hb = * (HeapTuple *) b;
+	BlockNumber	ba = ItemPointerGetBlockNumber(&ha->t_self);
+	OffsetNumber oa = ItemPointerGetOffsetNumber(&ha->t_self);
+	BlockNumber	bb = ItemPointerGetBlockNumber(&hb->t_self);
+	OffsetNumber ob = ItemPointerGetOffsetNumber(&hb->t_self);
+
+	if (ba < bb)
+		return -1;
+	if (ba > bb)
+		return 1;
+	if (oa < ob)
+		return -1;
+	if (oa > ob)
+		return 1;
+	return 0;
+}
+
+/*
+ * Discover the largest valid tuple offset number on the given page
+ *
+ * This code probably ought to live in some other module.
+ */
+static OffsetNumber
+get_page_max_offset(Relation relation, BlockNumber blocknumber)
+{
+	Buffer		buffer;
+	Page		p;
+	OffsetNumber offnum;
+
+	buffer = ReadBuffer(relation, blocknumber);
+	if (!BufferIsValid(buffer))
+		elog(ERROR, "get_page_max_offset: %s relation: ReadBuffer(%ld) failed",
+			 RelationGetRelationName(relation), (long) blocknumber);
+	LockBuffer(buffer, BUFFER_LOCK_SHARE);
+	p = BufferGetPage(buffer);
+	offnum = PageGetMaxOffsetNumber(p);
+	LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
+	ReleaseBuffer(buffer);
+	return offnum;
+}
+
+
+/*
+ *	compute_minimal_stats() -- compute minimal column statistics
  *
- *	This method works perfectly for columns with unique values, and columns
- *	with only two unique values, plus nulls.
+ *	We use this when we can find only an "=" operator for the datatype.
  *
- *	It becomes less perfect as the number of unique values increases and
- *	their distribution in the table becomes more random.
+ *	We determine the fraction of non-null rows, the average width, the
+ *	most common values, and the (estimated) number of distinct values.
  *
+ *	The most common values are determined by brute force: we keep a list
+ *	of previously seen values, ordered by number of times seen, as we scan
+ *	the samples.  A newly seen value is inserted just after the last
+ *	multiply-seen value, causing the bottommost (oldest) singly-seen value
+ *	to drop off the list.  The accuracy of this method, and also its cost,
+ *	depend mainly on the length of the list we are willing to keep.
  */
 static void
-attr_stats(Relation onerel, int attr_cnt, VacAttrStats *vacattrstats, HeapTuple tuple)
+compute_minimal_stats(VacAttrStats *stats,
+					  TupleDesc tupDesc, long totalrows,
+					  HeapTuple *rows, int numrows)
 {
 	int			i;
-	TupleDesc	tupDesc = onerel->rd_att;
-
-	for (i = 0; i < attr_cnt; i++)
+	int			null_cnt = 0;
+	int			nonnull_cnt = 0;
+	int			toowide_cnt = 0;
+	double		total_width = 0;
+	bool		is_varlena = (!stats->attr->attbyval &&
+							  stats->attr->attlen == -1);
+	FmgrInfo	f_cmpeq;
+	typedef struct
+	{
+		Datum	value;
+		int		count;
+	} TrackItem;
+	TrackItem  *track;
+	int			track_cnt,
+				track_max;
+	int			num_mcv = stats->attr->attstattarget;
+
+	/* We track up to 2*n values for an n-element MCV list; but at least 10 */
+	track_max = 2 * num_mcv;
+	if (track_max < 10)
+		track_max = 10;
+	track = (TrackItem *) palloc(track_max * sizeof(TrackItem));
+	track_cnt = 0;
+
+	fmgr_info(stats->eqfunc, &f_cmpeq);
+
+	for (i = 0; i < numrows; i++)
 	{
-		VacAttrStats *stats = &vacattrstats[i];
-		Datum		origvalue;
+		HeapTuple	tuple = rows[i];
 		Datum		value;
 		bool		isnull;
-		bool		value_hit;
-
-		if (!VacAttrStatsEqValid(stats))
-			continue;
-
-#ifdef	_DROP_COLUMN_HACK__
-		if (COLUMN_IS_DROPPED(stats->attr))
-			continue;
-#endif	 /* _DROP_COLUMN_HACK__ */
+		bool		match;
+		int			firstcount1,
+					j;
 
-		origvalue = heap_getattr(tuple, stats->attr->attnum,
-								 tupDesc, &isnull);
+		value = heap_getattr(tuple, stats->attnum, tupDesc, &isnull);
 
+		/* Check for null/nonnull */
 		if (isnull)
 		{
-			stats->null_cnt++;
+			null_cnt++;
 			continue;
 		}
-		stats->nonnull_cnt++;
+		nonnull_cnt++;
 
 		/*
-		 * If the value is toasted, detoast it to avoid repeated
-		 * detoastings and resultant memory leakage inside the comparison
-		 * routines.
+		 * If it's a varlena field, add up widths for average width
+		 * calculation.  Note that if the value is toasted, we
+		 * use the toasted width.  We don't bother with this calculation
+		 * if it's a fixed-width type.
 		 */
-		if (!stats->attr->attbyval && stats->attr->attlen == -1)
-			value = PointerGetDatum(PG_DETOAST_DATUM(origvalue));
-		else
-			value = origvalue;
-
-		if (!stats->initialized)
+		if (is_varlena)
 		{
-			bucketcpy(stats->attr, value, &stats->best, &stats->best_len);
-			/* best_cnt gets incremented below */
-			bucketcpy(stats->attr, value, &stats->guess1, &stats->guess1_len);
-			stats->guess1_cnt = stats->guess1_hits = 1;
-			bucketcpy(stats->attr, value, &stats->guess2, &stats->guess2_len);
-			stats->guess2_hits = 1;
-			if (VacAttrStatsLtGtValid(stats))
+			total_width += VARSIZE(DatumGetPointer(value));
+			/*
+			 * If the value is toasted, we want to detoast it just once to
+			 * avoid repeated detoastings and resultant excess memory usage
+			 * during the comparisons.  Also, check to see if the value is
+			 * excessively wide, and if so don't detoast at all --- just
+			 * ignore the value.
+			 */
+			if (toast_raw_datum_size(value) > WIDTH_THRESHOLD)
 			{
-				bucketcpy(stats->attr, value, &stats->max, &stats->max_len);
-				bucketcpy(stats->attr, value, &stats->min, &stats->min_len);
-				/* min_cnt, max_cnt get incremented below */
+				toowide_cnt++;
+				continue;
 			}
-			stats->initialized = true;
+			value = PointerGetDatum(PG_DETOAST_DATUM(value));
 		}
 
-		if (VacAttrStatsLtGtValid(stats))
+		/*
+		 * See if the value matches anything we're already tracking.
+		 */
+		match = false;
+		firstcount1 = track_cnt;
+		for (j = 0; j < track_cnt; j++)
 		{
-			if (DatumGetBool(FunctionCall2(&stats->f_cmplt,
-										   value, stats->min)))
+			if (DatumGetBool(FunctionCall2(&f_cmpeq, value, track[j].value)))
 			{
-				bucketcpy(stats->attr, value, &stats->min, &stats->min_len);
-				stats->min_cnt = 1;
+				match = true;
+				break;
 			}
-			else if (DatumGetBool(FunctionCall2(&stats->f_cmpeq,
-												value, stats->min)))
-				stats->min_cnt++;
+			if (j < firstcount1 && track[j].count == 1)
+				firstcount1 = j;
+		}
 
-			if (DatumGetBool(FunctionCall2(&stats->f_cmpgt,
-										   value, stats->max)))
+		if (match)
+		{
+			/* Found a match */
+			track[j].count++;
+			/* This value may now need to "bubble up" in the track list */
+			while (j > 0 && track[j].count > track[j-1].count)
 			{
-				bucketcpy(stats->attr, value, &stats->max, &stats->max_len);
-				stats->max_cnt = 1;
+				swapDatum(track[j].value, track[j-1].value);
+				swapInt(track[j].count, track[j-1].count);
+				j--;
 			}
-			else if (DatumGetBool(FunctionCall2(&stats->f_cmpeq,
-												value, stats->max)))
-				stats->max_cnt++;
 		}
-
-		value_hit = true;
-		if (DatumGetBool(FunctionCall2(&stats->f_cmpeq,
-									   value, stats->best)))
-			stats->best_cnt++;
-		else if (DatumGetBool(FunctionCall2(&stats->f_cmpeq,
-											value, stats->guess1)))
+		else
 		{
-			stats->guess1_cnt++;
-			stats->guess1_hits++;
+			/* No match.  Insert at head of count-1 list */
+			if (track_cnt < track_max)
+				track_cnt++;
+			for (j = track_cnt-1; j > firstcount1; j--)
+			{
+				track[j].value = track[j-1].value;
+				track[j].count = track[j-1].count;
+			}
+			if (firstcount1 < track_cnt)
+			{
+				track[firstcount1].value = value;
+				track[firstcount1].count = 1;
+			}
 		}
-		else if (DatumGetBool(FunctionCall2(&stats->f_cmpeq,
-											value, stats->guess2)))
-			stats->guess2_hits++;
+	}
+
+	/* We can only compute valid stats if we found some non-null values. */
+	if (nonnull_cnt > 0)
+	{
+		int		nmultiple,
+				summultiple;
+
+		stats->stats_valid = true;
+		/* Do the simple null-frac and width stats */
+		stats->stanullfrac = (double) null_cnt / (double) numrows;
+		if (is_varlena)
+			stats->stawidth = total_width / (double) nonnull_cnt;
 		else
-			value_hit = false;
+			stats->stawidth = stats->attrtype->typlen;
 
-		if (stats->guess2_hits > stats->guess1_hits)
+		/* Count the number of values we found multiple times */
+		summultiple = 0;
+		for (nmultiple = 0; nmultiple < track_cnt; nmultiple++)
 		{
-			swapDatum(stats->guess1, stats->guess2);
-			swapInt(stats->guess1_len, stats->guess2_len);
-			swapLong(stats->guess1_hits, stats->guess2_hits);
-			stats->guess1_cnt = stats->guess1_hits;
+			if (track[nmultiple].count == 1)
+				break;
+			summultiple += track[nmultiple].count;
 		}
-		if (stats->guess1_cnt > stats->best_cnt)
+
+		if (nmultiple == 0)
 		{
-			swapDatum(stats->best, stats->guess1);
-			swapInt(stats->best_len, stats->guess1_len);
-			swapLong(stats->best_cnt, stats->guess1_cnt);
-			stats->guess1_hits = 1;
-			stats->guess2_hits = 1;
+			/* If we found no repeated values, assume it's a unique column */
+			stats->stadistinct = -1.0;
 		}
-		if (!value_hit)
+		else if (track_cnt < track_max && toowide_cnt == 0 &&
+				 nmultiple == track_cnt)
 		{
-			bucketcpy(stats->attr, value, &stats->guess2, &stats->guess2_len);
-			stats->guess1_hits = 1;
-			stats->guess2_hits = 1;
+			/*
+			 * Our track list includes every value in the sample, and every
+			 * value appeared more than once.  Assume the column has just
+			 * these values.
+			 */
+			stats->stadistinct = track_cnt;
 		}
+		else
+		{
+			/*----------
+			 * Estimate the number of distinct values using the estimator
+			 * proposed by Chaudhuri et al (see citation above).  This is
+			 *		sqrt(n/r) * max(f1,1) + f2 + f3 + ...
+			 * where fk is the number of distinct values that occurred
+			 * exactly k times in our sample of r rows (from a total of n).
+			 * We assume (not very reliably!) that all the multiply-occurring
+			 * values are reflected in the final track[] list, and the other
+			 * nonnull values all appeared but once.
+			 *----------
+			 */
+			int		f1 = nonnull_cnt - summultiple;
+			double	term1;
 
-		/* Clean up detoasted copy, if any */
-		if (value != origvalue)
-			pfree(DatumGetPointer(value));
-	}
-}
+			if (f1 < 1)
+				f1 = 1;
+			term1 = sqrt((double) totalrows / (double) numrows) * f1;
+			stats->stadistinct = floor(term1 + nmultiple + 0.5);
+		}
 
-/*
- *	bucketcpy() -- copy a new value into one of the statistics buckets
- */
-static void
-bucketcpy(Form_pg_attribute attr, Datum value, Datum *bucket, int *bucket_len)
-{
-	if (attr->attbyval)
-		*bucket = value;
-	else
-	{
-		int			len = (attr->attlen != -1 ? attr->attlen : VARSIZE(value));
+		/*
+		 * If we estimated the number of distinct values at more than 10%
+		 * of the total row count (a very arbitrary limit), then assume
+		 * that stadistinct should scale with the row count rather than be
+		 * a fixed value.
+		 */
+		if (stats->stadistinct > 0.1 * totalrows)
+			stats->stadistinct = - (stats->stadistinct / totalrows);
 
-		/* Avoid unnecessary palloc() traffic... */
-		if (len > *bucket_len)
+		/* Generate an MCV slot entry, only if we found multiples */
+		if (nmultiple < num_mcv)
+			num_mcv = nmultiple;
+		if (num_mcv > 0)
 		{
-			if (*bucket_len != 0)
-				pfree(DatumGetPointer(*bucket));
-			*bucket = PointerGetDatum(palloc(len));
-			*bucket_len = len;
+			MemoryContext old_context;
+			Datum  *mcv_values;
+			float4 *mcv_freqs;
+
+			/* Must copy the target values into TransactionCommandContext */
+			old_context = MemoryContextSwitchTo(TransactionCommandContext);
+			mcv_values = (Datum *) palloc(num_mcv * sizeof(Datum));
+			mcv_freqs = (float4 *) palloc(num_mcv * sizeof(float4));
+			for (i = 0; i < num_mcv; i++)
+			{
+				mcv_values[i] = datumCopy(track[i].value,
+										  stats->attr->attbyval,
+										  stats->attr->attlen);
+				mcv_freqs[i] = (double) track[i].count / (double) numrows;
+			}
+			MemoryContextSwitchTo(old_context);
+
+			stats->stakind[0] = STATISTIC_KIND_MCV;
+			stats->staop[0] = stats->eqopr;
+			stats->stanumbers[0] = mcv_freqs;
+			stats->numnumbers[0] = num_mcv;
+			stats->stavalues[0] = mcv_values;
+			stats->numvalues[0] = num_mcv;
 		}
-		memcpy(DatumGetPointer(*bucket), DatumGetPointer(value), len);
 	}
+
+	/* We don't need to bother cleaning up any of our temporary palloc's */
 }
 
 
 /*
- *	update_attstats() -- update attribute statistics for one relation
+ *	compute_scalar_stats() -- compute column statistics
  *
- *		Statistics are stored in several places: the pg_class row for the
- *		relation has stats about the whole relation, the pg_attribute rows
- *		for each attribute store "dispersion", and there is a pg_statistic
- *		row for each (non-system) attribute.  (Dispersion probably ought to
- *		be moved to pg_statistic, but it's not worth doing unless there's
- *		another reason to have to change pg_attribute.)  The pg_class values
- *		are updated by VACUUM, not here.
- *
- *		We violate no-overwrite semantics here by storing new values for
- *		the dispersion column directly into the pg_attribute tuple that's
- *		already on the page.  The reason for this is that if we updated
- *		these tuples in the usual way, vacuuming pg_attribute itself
- *		wouldn't work very well --- by the time we got done with a vacuum
- *		cycle, most of the tuples in pg_attribute would've been obsoleted.
- *		Updating pg_attribute's own statistics would be especially tricky.
- *		Of course, this only works for fixed-size never-null columns, but
- *		dispersion is.
+ *	We use this when we can find "=" and "<" operators for the datatype.
  *
- *		pg_statistic rows are just added normally.	This means that
- *		pg_statistic will probably contain some deleted rows at the
- *		completion of a vacuum cycle, unless it happens to get vacuumed last.
+ *	We determine the fraction of non-null rows, the average width, the
+ *	most common values, the (estimated) number of distinct values, the
+ *	distribution histogram, and the correlation of physical to logical order.
  *
- *		To keep things simple, we punt for pg_statistic, and don't try
- *		to compute or store rows for pg_statistic itself in pg_statistic.
- *		This could possibly be made to work, but it's not worth the trouble.
+ *	The desired stats can be determined fairly easily after sorting the
+ *	data values into order.
  */
 static void
-update_attstats(Oid relid, int natts, VacAttrStats *vacattrstats)
+compute_scalar_stats(VacAttrStats *stats,
+					 TupleDesc tupDesc, long totalrows,
+					 HeapTuple *rows, int numrows)
 {
-	Relation	ad,
-				sd;
-	HeapScanDesc scan;
-	HeapTuple	atup,
-				stup;
-	ScanKeyData askey;
-	Form_pg_attribute attp;
-
-	ad = heap_openr(AttributeRelationName, RowExclusiveLock);
-	sd = heap_openr(StatisticRelationName, RowExclusiveLock);
-
-	/* Find pg_attribute rows for this relation */
-	ScanKeyEntryInitialize(&askey, 0, Anum_pg_attribute_attrelid,
-						   F_INT4EQ, relid);
-
-	scan = heap_beginscan(ad, false, SnapshotNow, 1, &askey);
-
-	while (HeapTupleIsValid(atup = heap_getnext(scan, 0)))
+	int			i;
+	int			null_cnt = 0;
+	int			nonnull_cnt = 0;
+	int			toowide_cnt = 0;
+	double		total_width = 0;
+	bool		is_varlena = (!stats->attr->attbyval &&
+							  stats->attr->attlen == -1);
+	double		corr_xysum;
+	RegProcedure cmpFn;
+	SortFunctionKind cmpFnKind;
+	FmgrInfo	f_cmpfn;
+	ScalarItem *values;
+	int			values_cnt = 0;
+	int		   *tupnoLink;
+	ScalarMCVItem *track;
+	int			track_cnt = 0;
+	int			num_mcv = stats->attr->attstattarget;
+
+	values = (ScalarItem *) palloc(numrows * sizeof(ScalarItem));
+	tupnoLink = (int *) palloc(numrows * sizeof(int));
+	track = (ScalarMCVItem *) palloc(num_mcv * sizeof(ScalarMCVItem));
+
+	SelectSortFunction(stats->ltopr, &cmpFn, &cmpFnKind);
+	fmgr_info(cmpFn, &f_cmpfn);
+
+	/* Initial scan to find sortable values */
+	for (i = 0; i < numrows; i++)
 	{
-		int			i;
-		VacAttrStats *stats;
+		HeapTuple	tuple = rows[i];
+		Datum		value;
+		bool		isnull;
 
-		attp = (Form_pg_attribute) GETSTRUCT(atup);
-		if (attp->attnum <= 0)	/* skip system attributes for now */
-			continue;
+		value = heap_getattr(tuple, stats->attnum, tupDesc, &isnull);
 
-		for (i = 0; i < natts; i++)
+		/* Check for null/nonnull */
+		if (isnull)
 		{
-			if (attp->attnum == vacattrstats[i].attr->attnum)
-				break;
+			null_cnt++;
+			continue;
 		}
-		if (i >= natts)
-			continue;			/* skip attr if no stats collected */
-		stats = &(vacattrstats[i]);
+		nonnull_cnt++;
 
-		if (VacAttrStatsEqValid(stats))
+		/*
+		 * If it's a varlena field, add up widths for average width
+		 * calculation.  Note that if the value is toasted, we
+		 * use the toasted width.  We don't bother with this calculation
+		 * if it's a fixed-width type.
+		 */
+		if (is_varlena)
 		{
-			float4		selratio;		/* average ratio of rows selected
-										 * for a random constant */
-
-			/* Compute dispersion */
-			if (stats->nonnull_cnt == 0 && stats->null_cnt == 0)
+			total_width += VARSIZE(DatumGetPointer(value));
+			/*
+			 * If the value is toasted, we want to detoast it just once to
+			 * avoid repeated detoastings and resultant excess memory usage
+			 * during the comparisons.  Also, check to see if the value is
+			 * excessively wide, and if so don't detoast at all --- just
+			 * ignore the value.
+			 */
+			if (toast_raw_datum_size(value) > WIDTH_THRESHOLD)
 			{
-
-				/*
-				 * empty relation, so put a dummy value in attdispersion
-				 */
-				selratio = 0;
+				toowide_cnt++;
+				continue;
 			}
-			else if (stats->null_cnt <= 1 && stats->best_cnt == 1)
-			{
+			value = PointerGetDatum(PG_DETOAST_DATUM(value));
+		}
 
-				/*
-				 * looks like we have a unique-key attribute --- flag this
-				 * with special -1.0 flag value.
-				 *
-				 * The correct dispersion is 1.0/numberOfRows, but since the
-				 * relation row count can get updated without recomputing
-				 * dispersion, we want to store a "symbolic" value and
-				 * figure 1.0/numberOfRows on the fly.
-				 */
-				selratio = -1;
-			}
-			else
+		/* Add it to the list to be sorted */
+		values[values_cnt].value = value;
+		values[values_cnt].tupno = values_cnt;
+		tupnoLink[values_cnt] = values_cnt;
+		values_cnt++;
+	}
+
+	/* We can only compute valid stats if we found some sortable values. */
+	if (values_cnt > 0)
+	{
+		int		ndistinct,		/* # distinct values in sample */
+				nmultiple,		/* # that appear multiple times */
+				num_hist,
+				dups_cnt;
+		int		slot_idx = 0;
+
+		/* Sort the collected values */
+		datumCmpFn = &f_cmpfn;
+		datumCmpFnKind = cmpFnKind;
+		datumCmpTupnoLink = tupnoLink;
+		qsort((void *) values, values_cnt,
+			  sizeof(ScalarItem), compare_scalars);
+
+		/*
+		 * Now scan the values in order, find the most common ones,
+		 * and also accumulate ordering-correlation statistics.
+		 *
+		 * To determine which are most common, we first have to count the
+		 * number of duplicates of each value.  The duplicates are adjacent
+		 * in the sorted list, so a brute-force approach is to compare
+		 * successive datum values until we find two that are not equal.
+		 * However, that requires N-1 invocations of the datum comparison
+		 * routine, which are completely redundant with work that was done
+		 * during the sort.  (The sort algorithm must at some point have
+		 * compared each pair of items that are adjacent in the sorted order;
+		 * otherwise it could not know that it's ordered the pair correctly.)
+		 * We exploit this by having compare_scalars remember the highest
+		 * tupno index that each ScalarItem has been found equal to.  At the
+		 * end of the sort, a ScalarItem's tupnoLink will still point to
+		 * itself if and only if it is the last item of its group of
+		 * duplicates (since the group will be ordered by tupno).
+		 */
+		corr_xysum = 0;
+		ndistinct = 0;
+		nmultiple = 0;
+		dups_cnt = 0;
+		for (i = 0; i < values_cnt; i++)
+		{
+			int			tupno = values[i].tupno;
+
+			corr_xysum += (double) i * (double) tupno;
+			dups_cnt++;
+			if (tupnoLink[tupno] == tupno)
 			{
-				if (VacAttrStatsLtGtValid(stats) &&
-					stats->min_cnt + stats->max_cnt == stats->nonnull_cnt)
+				/* Reached end of duplicates of this value */
+				ndistinct++;
+				if (dups_cnt > 1)
 				{
+					nmultiple++;
+					if (track_cnt < num_mcv ||
+						dups_cnt > track[track_cnt-1].count)
+					{
+						/*
+						 * Found a new item for the mcv list; find its
+						 * position, bubbling down old items if needed.
+						 * Loop invariant is that j points at an empty/
+						 * replaceable slot.
+						 */
+						int		j;
+
+						if (track_cnt < num_mcv)
+							track_cnt++;
+						for (j = track_cnt-1; j > 0; j--)
+						{
+							if (dups_cnt <= track[j-1].count)
+								break;
+							track[j].count = track[j-1].count;
+							track[j].first = track[j-1].first;
+						}
+						track[j].count = dups_cnt;
+						track[j].first = i + 1 - dups_cnt;
+					}
+				}
+				dups_cnt = 0;
+			}
+		}
 
-					/*
-					 * exact result when there are just 1 or 2 values...
-					 */
-					double		min_cnt_d = stats->min_cnt,
-								max_cnt_d = stats->max_cnt,
-								null_cnt_d = stats->null_cnt;
-					double		total = ((double) stats->nonnull_cnt) + null_cnt_d;
+		stats->stats_valid = true;
+		/* Do the simple null-frac and width stats */
+		stats->stanullfrac = (double) null_cnt / (double) numrows;
+		if (is_varlena)
+			stats->stawidth = total_width / (double) nonnull_cnt;
+		else
+			stats->stawidth = stats->attrtype->typlen;
 
-					selratio = (min_cnt_d * min_cnt_d + max_cnt_d * max_cnt_d + null_cnt_d * null_cnt_d) / (total * total);
-				}
-				else
-				{
-					double		most = (double) (stats->best_cnt > stats->null_cnt ? stats->best_cnt : stats->null_cnt);
-					double		total = ((double) stats->nonnull_cnt) + ((double) stats->null_cnt);
+		if (nmultiple == 0)
+		{
+			/* If we found no repeated values, assume it's a unique column */
+			stats->stadistinct = -1.0;
+		}
+		else if (toowide_cnt == 0 && nmultiple == ndistinct)
+		{
+			/*
+			 * Every value in the sample appeared more than once.  Assume the
+			 * column has just these values.
+			 */
+			stats->stadistinct = ndistinct;
+		}
+		else
+		{
+			/*----------
+			 * Estimate the number of distinct values using the estimator
+			 * proposed by Chaudhuri et al (see citation above).  This is
+			 *		sqrt(n/r) * max(f1,1) + f2 + f3 + ...
+			 * where fk is the number of distinct values that occurred
+			 * exactly k times in our sample of r rows (from a total of n).
+			 * Overwidth values are assumed to have been distinct.
+			 *----------
+			 */
+			int		f1 = ndistinct - nmultiple + toowide_cnt;
+			double	term1;
 
-					/*
-					 * we assume count of other values are 20% of best
-					 * count in table
-					 */
-					selratio = (most * most + 0.20 * most * (total - most)) / (total * total);
-				}
-				/* Make sure calculated values are in-range */
-				if (selratio < 0.0)
-					selratio = 0.0;
-				else if (selratio > 1.0)
-					selratio = 1.0;
+			if (f1 < 1)
+				f1 = 1;
+			term1 = sqrt((double) totalrows / (double) numrows) * f1;
+			stats->stadistinct = floor(term1 + nmultiple + 0.5);
+		}
+
+		/*
+		 * If we estimated the number of distinct values at more than 10%
+		 * of the total row count (a very arbitrary limit), then assume
+		 * that stadistinct should scale with the row count rather than be
+		 * a fixed value.
+		 */
+		if (stats->stadistinct > 0.1 * totalrows)
+			stats->stadistinct = - (stats->stadistinct / totalrows);
+
+		/* Generate an MCV slot entry, only if we found multiples */
+		if (nmultiple < num_mcv)
+			num_mcv = nmultiple;
+		Assert(track_cnt >= num_mcv);
+		if (num_mcv > 0)
+		{
+			MemoryContext old_context;
+			Datum  *mcv_values;
+			float4 *mcv_freqs;
+
+			/* Must copy the target values into TransactionCommandContext */
+			old_context = MemoryContextSwitchTo(TransactionCommandContext);
+			mcv_values = (Datum *) palloc(num_mcv * sizeof(Datum));
+			mcv_freqs = (float4 *) palloc(num_mcv * sizeof(float4));
+			for (i = 0; i < num_mcv; i++)
+			{
+				mcv_values[i] = datumCopy(values[track[i].first].value,
+										  stats->attr->attbyval,
+										  stats->attr->attlen);
+				mcv_freqs[i] = (double) track[i].count / (double) numrows;
 			}
+			MemoryContextSwitchTo(old_context);
+
+			stats->stakind[slot_idx] = STATISTIC_KIND_MCV;
+			stats->staop[slot_idx] = stats->eqopr;
+			stats->stanumbers[slot_idx] = mcv_freqs;
+			stats->numnumbers[slot_idx] = num_mcv;
+			stats->stavalues[slot_idx] = mcv_values;
+			stats->numvalues[slot_idx] = num_mcv;
+			slot_idx++;
+		}
 
-			/* overwrite the existing statistics in the tuple */
-			attp->attdispersion = selratio;
+		/*
+		 * Generate a histogram slot entry if there are at least two
+		 * distinct values not accounted for in the MCV list.  (This
+		 * ensures the histogram won't collapse to empty or a singleton.)
+		 */
+		num_hist = ndistinct - num_mcv;
+		if (num_hist > stats->attr->attstattarget)
+			num_hist = stats->attr->attstattarget + 1;
+		if (num_hist >= 2)
+		{
+			MemoryContext old_context;
+			Datum  *hist_values;
+			int		nvals;
 
-			/* invalidate the tuple in the cache and write the buffer */
-			RelationInvalidateHeapTuple(ad, atup);
-			WriteNoReleaseBuffer(scan->rs_cbuf);
+			/* Sort the MCV items into position order to speed next loop */
+			qsort((void *) track, num_mcv,
+				  sizeof(ScalarMCVItem), compare_mcvs);
 
 			/*
-			 * Create pg_statistic tuples for the relation, if we have
-			 * gathered the right data.  del_stats() previously deleted
-			 * all the pg_statistic tuples for the rel, so we just have to
-			 * insert new ones here.
+			 * Collapse out the MCV items from the values[] array.
 			 *
-			 * Note analyze_rel() has seen to it that we won't come here when
-			 * vacuuming pg_statistic itself.
+			 * Note we destroy the values[] array here... but we don't need
+			 * it for anything more.  We do, however, still need values_cnt.
 			 */
-			if (VacAttrStatsLtGtValid(stats) && stats->initialized)
+			if (num_mcv > 0)
 			{
-				float4		nullratio;
-				float4		bestratio;
-				FmgrInfo	out_function;
-				char	   *out_string;
-				double		best_cnt_d = stats->best_cnt,
-							null_cnt_d = stats->null_cnt,
-							nonnull_cnt_d = stats->nonnull_cnt; /* prevent overflow */
-				Datum		values[Natts_pg_statistic];
-				char		nulls[Natts_pg_statistic];
-				Relation	irelations[Num_pg_statistic_indices];
+				int		src,
+						dest;
+				int		j;
 
-				nullratio = null_cnt_d / (nonnull_cnt_d + null_cnt_d);
-				bestratio = best_cnt_d / (nonnull_cnt_d + null_cnt_d);
-
-				fmgr_info(stats->outfunc, &out_function);
+				src = dest = 0;
+				j = 0;			/* index of next interesting MCV item */
+				while (src < values_cnt)
+				{
+					int		ncopy;
+
+					if (j < num_mcv)
+					{
+						int		first = track[j].first;
+
+						if (src >= first)
+						{
+							/* advance past this MCV item */
+							src = first + track[j].count;
+							j++;
+							continue;
+						}
+						ncopy = first - src;
+					}
+					else
+					{
+						ncopy = values_cnt - src;
+					}
+					memmove(&values[dest], &values[src],
+							ncopy * sizeof(ScalarItem));
+					src += ncopy;
+					dest += ncopy;
+				}
+				nvals = dest;
+			}
+			else
+				nvals = values_cnt;
+			Assert(nvals >= num_hist);
 
-				for (i = 0; i < Natts_pg_statistic; ++i)
-					nulls[i] = ' ';
+			/* Must copy the target values into TransactionCommandContext */
+			old_context = MemoryContextSwitchTo(TransactionCommandContext);
+			hist_values = (Datum *) palloc(num_hist * sizeof(Datum));
+			for (i = 0; i < num_hist; i++)
+			{
+				int		pos;
 
-				/*
-				 * initialize values[]
-				 */
-				i = 0;
-				values[i++] = ObjectIdGetDatum(relid);	/* starelid */
-				values[i++] = Int16GetDatum(attp->attnum);		/* staattnum */
-				values[i++] = ObjectIdGetDatum(stats->op_cmplt);		/* staop */
-				values[i++] = Float4GetDatum(nullratio);		/* stanullfrac */
-				values[i++] = Float4GetDatum(bestratio);		/* stacommonfrac */
-				out_string = DatumGetCString(FunctionCall3(&out_function,
-														   stats->best,
-										ObjectIdGetDatum(stats->typelem),
-								 Int32GetDatum(stats->attr->atttypmod)));
-				values[i++] = DirectFunctionCall1(textin,		/* stacommonval */
-											CStringGetDatum(out_string));
-				pfree(out_string);
-				out_string = DatumGetCString(FunctionCall3(&out_function,
-														   stats->min,
-										ObjectIdGetDatum(stats->typelem),
-								 Int32GetDatum(stats->attr->atttypmod)));
-				values[i++] = DirectFunctionCall1(textin,		/* staloval */
-											CStringGetDatum(out_string));
-				pfree(out_string);
-				out_string = DatumGetCString(FunctionCall3(&out_function,
-														   stats->max,
-										ObjectIdGetDatum(stats->typelem),
-								 Int32GetDatum(stats->attr->atttypmod)));
-				values[i++] = DirectFunctionCall1(textin,		/* stahival */
-											CStringGetDatum(out_string));
-				pfree(out_string);
-
-				stup = heap_formtuple(sd->rd_att, values, nulls);
-
-				/* store tuple and update indexes too */
-				heap_insert(sd, stup);
-
-				CatalogOpenIndices(Num_pg_statistic_indices, Name_pg_statistic_indices, irelations);
-				CatalogIndexInsert(irelations, Num_pg_statistic_indices, sd, stup);
-				CatalogCloseIndices(Num_pg_statistic_indices, irelations);
-
-				/* release allocated space */
-				pfree(DatumGetPointer(values[Anum_pg_statistic_stacommonval - 1]));
-				pfree(DatumGetPointer(values[Anum_pg_statistic_staloval - 1]));
-				pfree(DatumGetPointer(values[Anum_pg_statistic_stahival - 1]));
-				heap_freetuple(stup);
+				pos = (i * (nvals - 1)) / (num_hist - 1);
+				hist_values[i] = datumCopy(values[pos].value,
+										   stats->attr->attbyval,
+										   stats->attr->attlen);
 			}
+			MemoryContextSwitchTo(old_context);
+
+			stats->stakind[slot_idx] = STATISTIC_KIND_HISTOGRAM;
+			stats->staop[slot_idx] = stats->ltopr;
+			stats->stavalues[slot_idx] = hist_values;
+			stats->numvalues[slot_idx] = num_hist;
+			slot_idx++;
+		}
+
+		/* Generate a correlation entry if there are multiple values */
+		if (values_cnt > 1)
+		{
+			MemoryContext old_context;
+			float4 *corrs;
+			double	corr_xsum,
+					corr_x2sum;
+
+			/* Must copy the target values into TransactionCommandContext */
+			old_context = MemoryContextSwitchTo(TransactionCommandContext);
+			corrs = (float4 *) palloc(sizeof(float4));
+			MemoryContextSwitchTo(old_context);
+
+			/*----------
+			 * Since we know the x and y value sets are both
+			 *		0, 1, ..., values_cnt-1
+			 * we have sum(x) = sum(y) =
+			 *		(values_cnt-1)*values_cnt / 2
+			 * and sum(x^2) = sum(y^2) =
+			 *		(values_cnt-1)*values_cnt*(2*values_cnt-1) / 6.
+			 *----------
+			 */
+			corr_xsum = (double) (values_cnt-1) * (double) values_cnt / 2.0;
+			corr_x2sum = (double) (values_cnt-1) * (double) values_cnt *
+				(double) (2*values_cnt-1) / 6.0;
+			/* And the correlation coefficient reduces to */
+			corrs[0] = (values_cnt * corr_xysum - corr_xsum * corr_xsum) /
+				(values_cnt * corr_x2sum - corr_xsum * corr_xsum);
+
+			stats->stakind[slot_idx] = STATISTIC_KIND_CORRELATION;
+			stats->staop[slot_idx] = stats->ltopr;
+			stats->stanumbers[slot_idx] = corrs;
+			stats->numnumbers[slot_idx] = 1;
+			slot_idx++;
 		}
 	}
-	heap_endscan(scan);
-	/* close rels, but hold locks till upcoming commit */
-	heap_close(ad, NoLock);
-	heap_close(sd, NoLock);
+
+	/* We don't need to bother cleaning up any of our temporary palloc's */
 }
 
 /*
- *	del_stats() -- delete pg_statistic rows for a relation
+ * qsort comparator for sorting ScalarItems
  *
- *	If a list of attribute numbers is given, only zap stats for those attrs.
+ * Aside from sorting the items, we update the datumCmpTupnoLink[] array
+ * whenever two ScalarItems are found to contain equal datums.  The array
+ * is indexed by tupno; for each ScalarItem, it contains the highest
+ * tupno that that item's datum has been found to be equal to.  This allows
+ * us to avoid additional comparisons in compute_scalar_stats().
  */
-static void
-del_stats(Oid relid, int attcnt, int *attnums)
+static int
+compare_scalars(const void *a, const void *b)
 {
-	Relation	pgstatistic;
-	HeapScanDesc scan;
-	HeapTuple	tuple;
-	ScanKeyData key;
+	Datum		da = ((ScalarItem *) a)->value;
+	int			ta = ((ScalarItem *) a)->tupno;
+	Datum		db = ((ScalarItem *) b)->value;
+	int			tb = ((ScalarItem *) b)->tupno;
 
-	pgstatistic = heap_openr(StatisticRelationName, RowExclusiveLock);
+	if (datumCmpFnKind == SORTFUNC_LT)
+	{
+		if (DatumGetBool(FunctionCall2(datumCmpFn, da, db)))
+			return -1;			/* a < b */
+		if (DatumGetBool(FunctionCall2(datumCmpFn, db, da)))
+			return 1;			/* a > b */
+	}
+	else
+	{
+		/* sort function is CMP or REVCMP */
+		int32	compare;
 
-	ScanKeyEntryInitialize(&key, 0x0, Anum_pg_statistic_starelid,
-						   F_OIDEQ, ObjectIdGetDatum(relid));
-	scan = heap_beginscan(pgstatistic, false, SnapshotNow, 1, &key);
+		compare = DatumGetInt32(FunctionCall2(datumCmpFn, da, db));
+		if (compare != 0)
+		{
+			if (datumCmpFnKind == SORTFUNC_REVCMP)
+				compare = -compare;
+			return compare;
+		}
+	}
 
-	while (HeapTupleIsValid(tuple = heap_getnext(scan, 0)))
+	/*
+	 * The two datums are equal, so update datumCmpTupnoLink[].
+	 */
+	if (datumCmpTupnoLink[ta] < tb)
+		datumCmpTupnoLink[ta] = tb;
+	if (datumCmpTupnoLink[tb] < ta)
+		datumCmpTupnoLink[tb] = ta;
+
+	/*
+	 * For equal datums, sort by tupno
+	 */
+	return ta - tb;
+}
+
+/*
+ * qsort comparator for sorting ScalarMCVItems by position
+ */
+static int
+compare_mcvs(const void *a, const void *b)
+{
+	int			da = ((ScalarMCVItem *) a)->first;
+	int			db = ((ScalarMCVItem *) b)->first;
+
+	return da - db;
+}
+
+
+/*
+ *	update_attstats() -- update attribute statistics for one relation
+ *
+ *		Statistics are stored in several places: the pg_class row for the
+ *		relation has stats about the whole relation, and there is a
+ *		pg_statistic row for each (non-system) attribute that has ever
+ *		been analyzed.  The pg_class values are updated by VACUUM, not here.
+ *
+ *		pg_statistic rows are just added or updated normally.  This means
+ *		that pg_statistic will probably contain some deleted rows at the
+ *		completion of a vacuum cycle, unless it happens to get vacuumed last.
+ *
+ *		To keep things simple, we punt for pg_statistic, and don't try
+ *		to compute or store rows for pg_statistic itself in pg_statistic.
+ *		This could possibly be made to work, but it's not worth the trouble.
+ *		Note analyze_rel() has seen to it that we won't come here when
+ *		vacuuming pg_statistic itself.
+ */
+static void
+update_attstats(Oid relid, int natts, VacAttrStats **vacattrstats)
+{
+	Relation	sd;
+	int			attno;
+
+	/*
+	 * We use an ExclusiveLock on pg_statistic to ensure that only one
+	 * backend is writing it at a time --- without that, we might have to
+	 * deal with concurrent updates here, and it's not worth the trouble.
+	 */
+	sd = heap_openr(StatisticRelationName, ExclusiveLock);
+
+	for (attno = 0; attno < natts; attno++)
 	{
-		if (attcnt > 0)
+		VacAttrStats *stats = vacattrstats[attno];
+		FmgrInfo	out_function;
+		HeapTuple	stup,
+					oldtup;
+		int			i, k, n;
+		Datum		values[Natts_pg_statistic];
+		char		nulls[Natts_pg_statistic];
+		char		replaces[Natts_pg_statistic];
+		Relation	irelations[Num_pg_statistic_indices];
+
+		/* Ignore attr if we weren't able to collect stats */
+		if (!stats->stats_valid)
+			continue;
+
+		fmgr_info(stats->attrtype->typoutput, &out_function);
+
+		/*
+		 * Construct a new pg_statistic tuple
+		 */
+		for (i = 0; i < Natts_pg_statistic; ++i)
 		{
-			Form_pg_statistic pgs = (Form_pg_statistic) GETSTRUCT(tuple);
-			int			i;
+			nulls[i] = ' ';
+			replaces[i] = 'r';
+		}
 
-			for (i = 0; i < attcnt; i++)
+		i = 0;
+		values[i++] = ObjectIdGetDatum(relid); /* starelid */
+		values[i++] = Int16GetDatum(stats->attnum); /* staattnum */
+		values[i++] = Float4GetDatum(stats->stanullfrac); /* stanullfrac */
+		values[i++] = Int32GetDatum(stats->stawidth); /* stawidth */
+		values[i++] = Float4GetDatum(stats->stadistinct); /* stadistinct */
+		for (k = 0; k < STATISTIC_NUM_SLOTS; k++)
+		{
+			values[i++] = Int16GetDatum(stats->stakind[k]);	/* stakindN */
+		}
+		for (k = 0; k < STATISTIC_NUM_SLOTS; k++)
+		{
+			values[i++] = ObjectIdGetDatum(stats->staop[k]); /* staopN */
+		}
+		for (k = 0; k < STATISTIC_NUM_SLOTS; k++)
+		{
+			int		nnum = stats->numnumbers[k];
+
+			if (nnum > 0)
 			{
-				if (pgs->staattnum == attnums[i] + 1)
-					break;
+				Datum	   *numdatums = (Datum *) palloc(nnum * sizeof(Datum));
+				ArrayType  *arry;
+
+				for (n = 0; n < nnum; n++)
+					numdatums[n] = Float4GetDatum(stats->stanumbers[k][n]);
+				/* XXX knows more than it should about type float4: */
+				arry = construct_array(numdatums, nnum,
+									   false, sizeof(float4), 'i');
+				values[i++] = PointerGetDatum(arry); /* stanumbersN */
+			}
+			else
+			{
+				nulls[i] = 'n';
+				values[i++] = (Datum) 0;
 			}
-			if (i >= attcnt)
-				continue;		/* don't delete it */
 		}
-		simple_heap_delete(pgstatistic, &tuple->t_self);
-	}
+		for (k = 0; k < STATISTIC_NUM_SLOTS; k++)
+		{
+			int		ntxt = stats->numvalues[k];
 
-	heap_endscan(scan);
+			if (ntxt > 0)
+			{
+				Datum	   *txtdatums = (Datum *) palloc(ntxt * sizeof(Datum));
+				ArrayType  *arry;
 
-	/*
-	 * Close rel, but *keep* lock; we will need to reacquire it later, so
-	 * there's a possibility of deadlock against another VACUUM process if
-	 * we let go now.  Keeping the lock shouldn't delay any common
-	 * operation other than an attempted VACUUM of pg_statistic itself.
-	 */
-	heap_close(pgstatistic, NoLock);
+				for (n = 0; n < ntxt; n++)
+				{
+					/*
+					 * Convert data values to a text string to be inserted
+					 * into the text array.
+					 */
+					Datum	stringdatum;
+
+					stringdatum =
+						FunctionCall3(&out_function,
+									  stats->stavalues[k][n],
+									  ObjectIdGetDatum(stats->attrtype->typelem),
+									  Int32GetDatum(stats->attr->atttypmod));
+					txtdatums[n] = DirectFunctionCall1(textin, stringdatum);
+					pfree(DatumGetPointer(stringdatum));
+				}
+				/* XXX knows more than it should about type text: */
+				arry = construct_array(txtdatums, ntxt,
+									   false, -1, 'i');
+				values[i++] = PointerGetDatum(arry); /* stavaluesN */
+			}
+			else
+			{
+				nulls[i] = 'n';
+				values[i++] = (Datum) 0;
+			}
+		}
+
+		/* Is there already a pg_statistic tuple for this attribute? */
+		oldtup = SearchSysCache(STATRELATT,
+								ObjectIdGetDatum(relid),
+								Int16GetDatum(stats->attnum),
+								0, 0);
+
+		if (HeapTupleIsValid(oldtup))
+		{
+			/* Yes, replace it */
+			stup = heap_modifytuple(oldtup,
+									sd,
+									values,
+									nulls,
+									replaces);
+			ReleaseSysCache(oldtup);
+			simple_heap_update(sd, &stup->t_self, stup);
+		}
+		else
+		{
+			/* No, insert new tuple */
+			stup = heap_formtuple(sd->rd_att, values, nulls);
+			heap_insert(sd, stup);
+		}
+
+		/* update indices too */
+		CatalogOpenIndices(Num_pg_statistic_indices, Name_pg_statistic_indices,
+						   irelations);
+		CatalogIndexInsert(irelations, Num_pg_statistic_indices, sd, stup);
+		CatalogCloseIndices(Num_pg_statistic_indices, irelations);
+
+		heap_freetuple(stup);
+	}
+
+	/* close rel, but hold lock till upcoming commit */
+	heap_close(sd, NoLock);
 }
diff --git a/src/backend/commands/command.c b/src/backend/commands/command.c
index 96d493688e3..13a78f11773 100644
--- a/src/backend/commands/command.c
+++ b/src/backend/commands/command.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/commands/Attic/command.c,v 1.125 2001/03/23 04:49:52 momjian Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/commands/Attic/command.c,v 1.126 2001/05/07 00:43:17 tgl Exp $
  *
  * NOTES
  *	  The PerformAddAttribute() code, like most of the relation
@@ -56,6 +56,7 @@
 #include "access/genam.h"
 
 
+static void drop_default(Oid relid, int16 attnum);
 static bool needs_toast_table(Relation rel);
 static bool is_relation(char *name);
 
@@ -408,7 +409,7 @@ AlterTableAddColumn(const char *relationName,
 		HeapTuple	typeTuple;
 		Form_pg_type tform;
 		char	   *typename;
-		int			attnelems;
+		int			attndims;
 
 		if (SearchSysCacheExists(ATTNAME,
 								 ObjectIdGetDatum(reltup->t_data->t_oid),
@@ -425,11 +426,11 @@ AlterTableAddColumn(const char *relationName,
 
 		if (colDef->typename->arrayBounds)
 		{
-			attnelems = length(colDef->typename->arrayBounds);
+			attndims = length(colDef->typename->arrayBounds);
 			typename = makeArrayTypeName(colDef->typename->name);
 		}
 		else
-			attnelems = 0;
+			attndims = 0;
 
 		typeTuple = SearchSysCache(TYPENAME,
 								   PointerGetDatum(typename),
@@ -441,12 +442,12 @@ AlterTableAddColumn(const char *relationName,
 		namestrcpy(&(attribute->attname), colDef->colname);
 		attribute->atttypid = typeTuple->t_data->t_oid;
 		attribute->attlen = tform->typlen;
-		attribute->attdispersion = 0;
+		attribute->attstattarget = DEFAULT_ATTSTATTARGET;
 		attribute->attcacheoff = -1;
 		attribute->atttypmod = colDef->typename->typmod;
 		attribute->attnum = i;
 		attribute->attbyval = tform->typbyval;
-		attribute->attnelems = attnelems;
+		attribute->attndims = attndims;
 		attribute->attisset = (bool) (tform->typtype == 'c');
 		attribute->attstorage = tform->typstorage;
 		attribute->attalign = tform->typalign;
@@ -496,17 +497,13 @@ AlterTableAddColumn(const char *relationName,
 }
 
 
-
-static void drop_default(Oid relid, int16 attnum);
-
-
 /*
  * ALTER TABLE ALTER COLUMN SET/DROP DEFAULT
  */
 void
-AlterTableAlterColumn(const char *relationName,
-					  bool inh, const char *colName,
-					  Node *newDefault)
+AlterTableAlterColumnDefault(const char *relationName,
+							 bool inh, const char *colName,
+							 Node *newDefault)
 {
 	Relation	rel;
 	HeapTuple	tuple;
@@ -551,8 +548,8 @@ AlterTableAlterColumn(const char *relationName,
 			if (childrelid == myrelid)
 				continue;
 			rel = heap_open(childrelid, AccessExclusiveLock);
-			AlterTableAlterColumn(RelationGetRelationName(rel),
-								  false, colName, newDefault);
+			AlterTableAlterColumnDefault(RelationGetRelationName(rel),
+										 false, colName, newDefault);
 			heap_close(rel, AccessExclusiveLock);
 		}
 	}
@@ -560,7 +557,7 @@ AlterTableAlterColumn(const char *relationName,
 	/* -= now do the thing on this relation =- */
 
 	/* reopen the business */
-	rel = heap_openr((char *) relationName, AccessExclusiveLock);
+	rel = heap_openr(relationName, AccessExclusiveLock);
 
 	/*
 	 * get the number of the attribute
@@ -647,7 +644,6 @@ AlterTableAlterColumn(const char *relationName,
 }
 
 
-
 static void
 drop_default(Oid relid, int16 attnum)
 {
@@ -675,6 +671,104 @@ drop_default(Oid relid, int16 attnum)
 }
 
 
+/*
+ * ALTER TABLE ALTER COLUMN SET STATISTICS
+ */
+void
+AlterTableAlterColumnStatistics(const char *relationName,
+								bool inh, const char *colName,
+								Node *statsTarget)
+{
+	Relation	rel;
+	Oid			myrelid;
+	int			newtarget;
+	Relation	attrelation;
+	HeapTuple	tuple;
+
+#ifndef NO_SECURITY
+	if (!pg_ownercheck(GetUserId(), relationName, RELNAME))
+		elog(ERROR, "ALTER TABLE: permission denied");
+#endif
+
+	rel = heap_openr(relationName, AccessExclusiveLock);
+	if (rel->rd_rel->relkind != RELKIND_RELATION)
+		elog(ERROR, "ALTER TABLE: relation \"%s\" is not a table",
+			 relationName);
+	myrelid = RelationGetRelid(rel);
+	heap_close(rel, NoLock);	/* close rel, but keep lock! */
+
+	/*
+	 * Propagate to children if desired
+	 */
+	if (inh)
+	{
+		List	   *child,
+				   *children;
+
+		/* this routine is actually in the planner */
+		children = find_all_inheritors(myrelid);
+
+		/*
+		 * find_all_inheritors does the recursive search of the
+		 * inheritance hierarchy, so all we have to do is process all of
+		 * the relids in the list that it returns.
+		 */
+		foreach(child, children)
+		{
+			Oid			childrelid = lfirsti(child);
+
+			if (childrelid == myrelid)
+				continue;
+			rel = heap_open(childrelid, AccessExclusiveLock);
+			AlterTableAlterColumnStatistics(RelationGetRelationName(rel),
+											false, colName, statsTarget);
+			heap_close(rel, AccessExclusiveLock);
+		}
+	}
+
+	/* -= now do the thing on this relation =- */
+
+	Assert(IsA(statsTarget, Integer));
+	newtarget = intVal(statsTarget);
+
+	/* Limit target to sane range (should we raise an error instead?) */
+	if (newtarget < 0)
+		newtarget = 0;
+	else if (newtarget > 1000)
+		newtarget = 1000;
+
+	attrelation = heap_openr(AttributeRelationName, RowExclusiveLock);
+
+	tuple = SearchSysCacheCopy(ATTNAME,
+							   ObjectIdGetDatum(myrelid),
+							   PointerGetDatum(colName),
+							   0, 0);
+	if (!HeapTupleIsValid(tuple))
+		elog(ERROR, "ALTER TABLE: relation \"%s\" has no column \"%s\"",
+			 relationName, colName);
+
+	if (((Form_pg_attribute) GETSTRUCT(tuple))->attnum < 0)
+		elog(ERROR, "ALTER TABLE: cannot change system attribute \"%s\"",
+			 colName);
+
+	((Form_pg_attribute) GETSTRUCT(tuple))->attstattarget = newtarget;
+
+	simple_heap_update(attrelation, &tuple->t_self, tuple);
+
+	/* keep system catalog indices current */
+	{
+		Relation	irelations[Num_pg_attr_indices];
+
+		CatalogOpenIndices(Num_pg_attr_indices, Name_pg_attr_indices, irelations);
+		CatalogIndexInsert(irelations, Num_pg_attr_indices, attrelation, tuple);
+		CatalogCloseIndices(Num_pg_attr_indices, irelations);
+	}
+
+	heap_freetuple(tuple);
+	heap_close(attrelation, RowExclusiveLock);
+}
+
+
 #ifdef	_DROP_COLUMN_HACK__
 /*
  *	ALTER TABLE DROP COLUMN trial implementation
diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c
index 694d0e8bbc1..9a0dbdc8c8e 100644
--- a/src/backend/commands/vacuum.c
+++ b/src/backend/commands/vacuum.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/commands/vacuum.c,v 1.189 2001/03/25 23:23:58 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/commands/vacuum.c,v 1.190 2001/05/07 00:43:18 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -53,25 +53,90 @@ extern XLogRecPtr log_heap_move(Relation reln,
 			  Buffer oldbuf, ItemPointerData from,
 			  Buffer newbuf, HeapTuple newtup);
 
+
+typedef struct VRelListData
+{
+	Oid			vrl_relid;
+	struct VRelListData *vrl_next;
+} VRelListData;
+
+typedef VRelListData *VRelList;
+
+typedef struct VacPageData
+{
+	BlockNumber blkno;			/* BlockNumber of this Page */
+	Size		free;			/* FreeSpace on this Page */
+	uint16		offsets_used;	/* Number of OffNums used by vacuum */
+	uint16		offsets_free;	/* Number of OffNums free or to be free */
+	OffsetNumber offsets[1];	/* Array of its OffNums */
+} VacPageData;
+
+typedef VacPageData *VacPage;
+
+typedef struct VacPageListData
+{
+	int			empty_end_pages;/* Number of "empty" end-pages */
+	int			num_pages;		/* Number of pages in pagedesc */
+	int			num_allocated_pages;	/* Number of allocated pages in
+										 * pagedesc */
+	VacPage    *pagedesc;		/* Descriptions of pages */
+} VacPageListData;
+
+typedef VacPageListData *VacPageList;
+
+typedef struct VTupleLinkData
+{
+	ItemPointerData new_tid;
+	ItemPointerData this_tid;
+} VTupleLinkData;
+
+typedef VTupleLinkData *VTupleLink;
+
+typedef struct VTupleMoveData
+{
+	ItemPointerData tid;		/* tuple ID */
+	VacPage		vacpage;		/* where to move */
+	bool		cleanVpd;		/* clean vacpage before using */
+} VTupleMoveData;
+
+typedef VTupleMoveData *VTupleMove;
+
+typedef struct VRelStats
+{
+	Oid			relid;
+	long		num_pages;
+	long		num_tuples;
+	Size		min_tlen;
+	Size		max_tlen;
+	bool		hasindex;
+	int			num_vtlinks;
+	VTupleLink	vtlinks;
+} VRelStats;
+
+
 static MemoryContext vac_context = NULL;
 
 static int	MESSAGE_LEVEL;		/* message level */
 
 static TransactionId XmaxRecent;
 
+
 /* non-export function prototypes */
 static void vacuum_init(void);
 static void vacuum_shutdown(void);
-static void vac_vacuum(NameData *VacRelP, bool analyze, List *anal_cols2);
-static VRelList getrels(NameData *VacRelP);
+static VRelList getrels(Name VacRelP, const char *stmttype);
 static void vacuum_rel(Oid relid);
-static void scan_heap(VRelStats *vacrelstats, Relation onerel, VacPageList vacuum_pages, VacPageList fraged_pages);
-static void repair_frag(VRelStats *vacrelstats, Relation onerel, VacPageList vacuum_pages, VacPageList fraged_pages, int nindices, Relation *Irel);
-static void vacuum_heap(VRelStats *vacrelstats, Relation onerel, VacPageList vacpagelist);
+static void scan_heap(VRelStats *vacrelstats, Relation onerel,
+					  VacPageList vacuum_pages, VacPageList fraged_pages);
+static void repair_frag(VRelStats *vacrelstats, Relation onerel,
+						VacPageList vacuum_pages, VacPageList fraged_pages,
+						int nindices, Relation *Irel);
+static void vacuum_heap(VRelStats *vacrelstats, Relation onerel,
+						VacPageList vacpagelist);
 static void vacuum_page(Relation onerel, Buffer buffer, VacPage vacpage);
-static void vacuum_index(VacPageList vacpagelist, Relation indrel, int num_tuples, int keep_tuples);
-static void scan_index(Relation indrel, int num_tuples);
-static void update_relstats(Oid relid, int num_pages, int num_tuples, bool hasindex, VRelStats *vacrelstats);
+static void vacuum_index(VacPageList vacpagelist, Relation indrel,
+						 long num_tuples, int keep_tuples);
+static void scan_index(Relation indrel, long num_tuples);
 static VacPage tid_reaped(ItemPointer itemptr, VacPageList vacpagelist);
 static void reap_page(VacPageList vacpagelist, VacPage vacpage);
 static void vpage_insert(VacPageList vacpagelist, VacPage vpnew);
@@ -88,17 +153,17 @@ static bool enough_space(VacPage vacpage, Size len);
 static char *show_rusage(struct rusage * ru0);
 
 
+/*
+ * Primary entry point for VACUUM and ANALYZE commands.
+ */
 void
-vacuum(char *vacrel, bool verbose, bool analyze, List *anal_cols)
+vacuum(VacuumStmt *vacstmt)
 {
+	const char *stmttype = vacstmt->vacuum ? "VACUUM" : "ANALYZE";
 	NameData	VacRel;
 	Name		VacRelName;
-	MemoryContext old;
-	List	   *le;
-	List	   *anal_cols2 = NIL;
-
-	if (anal_cols != NIL && !analyze)
-		elog(ERROR, "Can't vacuum columns, only tables.  You can 'vacuum analyze' columns.");
+	VRelList	vrl,
+				cur;
 
 	/*
 	 * We cannot run VACUUM inside a user transaction block; if we were
@@ -110,9 +175,9 @@ vacuum(char *vacrel, bool verbose, bool analyze, List *anal_cols)
 	 * behavior.
 	 */
 	if (IsTransactionBlock())
-		elog(ERROR, "VACUUM cannot run inside a BEGIN/END block");
+		elog(ERROR, "%s cannot run inside a BEGIN/END block", stmttype);
 
-	if (verbose)
+	if (vacstmt->verbose)
 		MESSAGE_LEVEL = NOTICE;
 	else
 		MESSAGE_LEVEL = DEBUG;
@@ -130,37 +195,36 @@ vacuum(char *vacrel, bool verbose, bool analyze, List *anal_cols)
 										ALLOCSET_DEFAULT_INITSIZE,
 										ALLOCSET_DEFAULT_MAXSIZE);
 
-	/* vacrel gets de-allocated on xact commit, so copy it to safe storage */
-	if (vacrel)
+	/* Convert vacrel, which is just a string, to a Name */
+	if (vacstmt->vacrel)
 	{
-		namestrcpy(&VacRel, vacrel);
+		namestrcpy(&VacRel, vacstmt->vacrel);
 		VacRelName = &VacRel;
 	}
 	else
 		VacRelName = NULL;
 
-	/* must also copy the column list, if any, to safe storage */
-	old = MemoryContextSwitchTo(vac_context);
-	foreach(le, anal_cols)
-	{
-		char	   *col = (char *) lfirst(le);
-
-		anal_cols2 = lappend(anal_cols2, pstrdup(col));
-	}
-	MemoryContextSwitchTo(old);
+	/* Build list of relations to process (note this lives in vac_context) */
+	vrl = getrels(VacRelName, stmttype);
 
 	/*
 	 * Start up the vacuum cleaner.
-	 *
-	 * NOTE: since this commits the current transaction, the memory holding
-	 * any passed-in parameters gets freed here.  We must have already
-	 * copied pass-by-reference parameters to safe storage.  Don't make me
-	 * fix this again!
 	 */
 	vacuum_init();
 
-	/* vacuum the database */
-	vac_vacuum(VacRelName, analyze, anal_cols2);
+	/*
+	 * Process each selected relation.  We are careful to process
+	 * each relation in a separate transaction in order to avoid holding
+	 * too many locks at one time.
+	 */
+	for (cur = vrl; cur != (VRelList) NULL; cur = cur->vrl_next)
+	{
+		if (vacstmt->vacuum)
+			vacuum_rel(cur->vrl_relid);
+		/* analyze separately so locking is minimized */
+		if (vacstmt->analyze)
+			analyze_rel(cur->vrl_relid, vacstmt);
+	}
 
 	/* clean up */
 	vacuum_shutdown();
@@ -187,14 +251,14 @@ vacuum(char *vacrel, bool verbose, bool analyze, List *anal_cols)
  *		PostgresMain().
  */
 static void
-vacuum_init()
+vacuum_init(void)
 {
 	/* matches the StartTransaction in PostgresMain() */
 	CommitTransactionCommand();
 }
 
 static void
-vacuum_shutdown()
+vacuum_shutdown(void)
 {
 	/* on entry, we are not in a transaction */
 
@@ -223,34 +287,10 @@ vacuum_shutdown()
 }
 
 /*
- *	vac_vacuum() -- vacuum the database.
- *
- *		This routine builds a list of relations to vacuum, and then calls
- *		code that vacuums them one at a time.  We are careful to vacuum each
- *		relation in a separate transaction in order to avoid holding too many
- *		locks at one time.
+ * Build a list of VRelListData nodes for each relation to be processed
  */
-static void
-vac_vacuum(NameData *VacRelP, bool analyze, List *anal_cols2)
-{
-	VRelList	vrl,
-				cur;
-
-	/* get list of relations */
-	vrl = getrels(VacRelP);
-
-	/* vacuum each heap relation */
-	for (cur = vrl; cur != (VRelList) NULL; cur = cur->vrl_next)
-	{
-		vacuum_rel(cur->vrl_relid);
-		/* analyze separately so locking is minimized */
-		if (analyze)
-			analyze_rel(cur->vrl_relid, anal_cols2, MESSAGE_LEVEL);
-	}
-}
-
 static VRelList
-getrels(NameData *VacRelP)
+getrels(Name VacRelP, const char *stmttype)
 {
 	Relation	rel;
 	TupleDesc	tupdesc;
@@ -262,12 +302,9 @@ getrels(NameData *VacRelP)
 	char	   *rname;
 	char		rkind;
 	bool		n;
-	bool		found = false;
 	ScanKeyData key;
 
-	StartTransactionCommand();
-
-	if (NameStr(*VacRelP))
+	if (VacRelP)
 	{
 
 		/*
@@ -287,6 +324,7 @@ getrels(NameData *VacRelP)
 	}
 	else
 	{
+		/* find all relations listed in pg_class */
 		ScanKeyEntryInitialize(&key, 0x0, Anum_pg_class_relkind,
 							   F_CHAREQ, CharGetDatum('r'));
 	}
@@ -300,21 +338,20 @@ getrels(NameData *VacRelP)
 
 	while (HeapTupleIsValid(tuple = heap_getnext(scan, 0)))
 	{
-		found = true;
-
 		d = heap_getattr(tuple, Anum_pg_class_relname, tupdesc, &n);
-		rname = (char *) DatumGetPointer(d);
+		rname = (char *) DatumGetName(d);
 
 		d = heap_getattr(tuple, Anum_pg_class_relkind, tupdesc, &n);
 		rkind = DatumGetChar(d);
 
 		if (rkind != RELKIND_RELATION)
 		{
-			elog(NOTICE, "Vacuum: can not process indices, views and certain system tables");
+			elog(NOTICE, "%s: can not process indexes, views or special system tables",
+				 stmttype);
 			continue;
 		}
 
-		/* get a relation list entry for this guy */
+		/* Make a relation list entry for this guy */
 		if (vrl == (VRelList) NULL)
 			vrl = cur = (VRelList)
 				MemoryContextAlloc(vac_context, sizeof(VRelListData));
@@ -332,10 +369,8 @@ getrels(NameData *VacRelP)
 	heap_endscan(scan);
 	heap_close(rel, AccessShareLock);
 
-	if (!found)
-		elog(NOTICE, "Vacuum: table not found");
-
-	CommitTransactionCommand();
+	if (vrl == NULL)
+		elog(NOTICE, "%s: table not found", stmttype);
 
 	return vrl;
 }
@@ -432,7 +467,8 @@ vacuum_rel(Oid relid)
 	 */
 	vacrelstats = (VRelStats *) palloc(sizeof(VRelStats));
 	vacrelstats->relid = relid;
-	vacrelstats->num_pages = vacrelstats->num_tuples = 0;
+	vacrelstats->num_pages = 0;
+	vacrelstats->num_tuples = 0;
 	vacrelstats->hasindex = false;
 
 	GetXmaxRecent(&XmaxRecent);
@@ -457,8 +493,8 @@ vacuum_rel(Oid relid)
 		vacrelstats->hasindex = true;
 	else
 		vacrelstats->hasindex = false;
-#ifdef NOT_USED
 
+#ifdef NOT_USED
 	/*
 	 * reindex in VACUUM is dangerous under WAL. ifdef out until it
 	 * becomes safe.
@@ -528,9 +564,8 @@ vacuum_rel(Oid relid)
 	heap_close(onerel, NoLock);
 
 	/* update statistics in pg_class */
-	update_relstats(vacrelstats->relid, vacrelstats->num_pages,
-					vacrelstats->num_tuples, vacrelstats->hasindex,
-					vacrelstats);
+	vac_update_relstats(vacrelstats->relid, vacrelstats->num_pages,
+						vacrelstats->num_tuples, vacrelstats->hasindex);
 
 	/*
 	 * Complete the transaction and free all temporary memory used.
@@ -582,8 +617,8 @@ scan_heap(VRelStats *vacrelstats, Relation onerel,
 	char	   *relname;
 	VacPage		vacpage,
 				vp;
+	long		num_tuples;
 	uint32		tups_vacuumed,
-				num_tuples,
 				nkeep,
 				nunused,
 				ncrash,
@@ -913,7 +948,6 @@ scan_heap(VRelStats *vacrelstats, Relation onerel,
 	/* save stats in the rel list for use later */
 	vacrelstats->num_tuples = num_tuples;
 	vacrelstats->num_pages = nblocks;
-/*	  vacrelstats->natts = attr_cnt;*/
 	if (num_tuples == 0)
 		min_tlen = max_tlen = 0;
 	vacrelstats->min_tlen = min_tlen;
@@ -960,7 +994,7 @@ scan_heap(VRelStats *vacrelstats, Relation onerel,
 	}
 
 	elog(MESSAGE_LEVEL, "Pages %u: Changed %u, reaped %u, Empty %u, New %u; \
-Tup %u: Vac %u, Keep/VTL %u/%u, Crash %u, UnUsed %u, MinLen %lu, MaxLen %lu; \
+Tup %lu: Vac %u, Keep/VTL %u/%u, Crash %u, UnUsed %u, MinLen %lu, MaxLen %lu; \
 Re-using: Free/Avail. Space %lu/%lu; EndEmpty/Avail. Pages %u/%u. %s",
 		 nblocks, changed_pages, vacuum_pages->num_pages, empty_pages,
 		 new_pages, num_tuples, tups_vacuumed,
@@ -2009,7 +2043,7 @@ vacuum_heap(VRelStats *vacrelstats, Relation onerel, VacPageList vacuum_pages)
 {
 	Buffer		buf;
 	VacPage    *vacpage;
-	int			nblocks;
+	long		nblocks;
 	int			i;
 
 	nblocks = vacuum_pages->num_pages;
@@ -2044,7 +2078,7 @@ vacuum_heap(VRelStats *vacrelstats, Relation onerel, VacPageList vacuum_pages)
 	/* truncate relation if there are some empty end-pages */
 	if (vacuum_pages->empty_end_pages > 0)
 	{
-		elog(MESSAGE_LEVEL, "Rel %s: Pages: %u --> %u.",
+		elog(MESSAGE_LEVEL, "Rel %s: Pages: %lu --> %lu.",
 			 RelationGetRelationName(onerel),
 			 vacrelstats->num_pages, nblocks);
 		nblocks = smgrtruncate(DEFAULT_SMGR, onerel, nblocks);
@@ -2094,11 +2128,11 @@ vacuum_page(Relation onerel, Buffer buffer, VacPage vacpage)
  *
  */
 static void
-scan_index(Relation indrel, int num_tuples)
+scan_index(Relation indrel, long num_tuples)
 {
 	RetrieveIndexResult res;
 	IndexScanDesc iscan;
-	int			nitups;
+	long		nitups;
 	int			nipages;
 	struct rusage ru0;
 
@@ -2119,14 +2153,14 @@ scan_index(Relation indrel, int num_tuples)
 
 	/* now update statistics in pg_class */
 	nipages = RelationGetNumberOfBlocks(indrel);
-	update_relstats(RelationGetRelid(indrel), nipages, nitups, false, NULL);
+	vac_update_relstats(RelationGetRelid(indrel), nipages, nitups, false);
 
-	elog(MESSAGE_LEVEL, "Index %s: Pages %u; Tuples %u. %s",
+	elog(MESSAGE_LEVEL, "Index %s: Pages %u; Tuples %lu. %s",
 		 RelationGetRelationName(indrel), nipages, nitups,
 		 show_rusage(&ru0));
 
 	if (nitups != num_tuples)
-		elog(NOTICE, "Index %s: NUMBER OF INDEX' TUPLES (%u) IS NOT THE SAME AS HEAP' (%u).\
+		elog(NOTICE, "Index %s: NUMBER OF INDEX' TUPLES (%lu) IS NOT THE SAME AS HEAP' (%lu).\
 \n\tRecreate the index.",
 			 RelationGetRelationName(indrel), nitups, num_tuples);
 
@@ -2145,13 +2179,14 @@ scan_index(Relation indrel, int num_tuples)
  *		pg_class.
  */
 static void
-vacuum_index(VacPageList vacpagelist, Relation indrel, int num_tuples, int keep_tuples)
+vacuum_index(VacPageList vacpagelist, Relation indrel,
+			 long num_tuples, int keep_tuples)
 {
 	RetrieveIndexResult res;
 	IndexScanDesc iscan;
 	ItemPointer heapptr;
 	int			tups_vacuumed;
-	int			num_index_tuples;
+	long		num_index_tuples;
 	int			num_pages;
 	VacPage		vp;
 	struct rusage ru0;
@@ -2196,15 +2231,16 @@ vacuum_index(VacPageList vacpagelist, Relation indrel, int num_tuples, int keep_
 
 	/* now update statistics in pg_class */
 	num_pages = RelationGetNumberOfBlocks(indrel);
-	update_relstats(RelationGetRelid(indrel), num_pages, num_index_tuples, false, NULL);
+	vac_update_relstats(RelationGetRelid(indrel),
+						num_pages, num_index_tuples, false);
 
-	elog(MESSAGE_LEVEL, "Index %s: Pages %u; Tuples %u: Deleted %u. %s",
+	elog(MESSAGE_LEVEL, "Index %s: Pages %u; Tuples %lu: Deleted %u. %s",
 		 RelationGetRelationName(indrel), num_pages,
 		 num_index_tuples - keep_tuples, tups_vacuumed,
 		 show_rusage(&ru0));
 
 	if (num_index_tuples != num_tuples + keep_tuples)
-		elog(NOTICE, "Index %s: NUMBER OF INDEX' TUPLES (%u) IS NOT THE SAME AS HEAP' (%u).\
+		elog(NOTICE, "Index %s: NUMBER OF INDEX' TUPLES (%lu) IS NOT THE SAME AS HEAP' (%lu).\
 \n\tRecreate the index.",
 		  RelationGetRelationName(indrel), num_index_tuples, num_tuples);
 
@@ -2255,7 +2291,7 @@ tid_reaped(ItemPointer itemptr, VacPageList vacpagelist)
 }
 
 /*
- *	update_relstats() -- update statistics for one relation
+ *	vac_update_relstats() -- update statistics for one relation
  *
  *		Update the whole-relation statistics that are kept in its pg_class
  *		row.  There are additional stats that will be updated if we are
@@ -2268,13 +2304,12 @@ tid_reaped(ItemPointer itemptr, VacPageList vacpagelist)
  *		we updated these tuples in the usual way, vacuuming pg_class itself
  *		wouldn't work very well --- by the time we got done with a vacuum
  *		cycle, most of the tuples in pg_class would've been obsoleted.
- *		Updating pg_class's own statistics would be especially tricky.
  *		Of course, this only works for fixed-size never-null columns, but
  *		these are.
  */
-static void
-update_relstats(Oid relid, int num_pages, int num_tuples, bool hasindex,
-				VRelStats *vacrelstats)
+void
+vac_update_relstats(Oid relid, long num_pages, double num_tuples,
+					bool hasindex)
 {
 	Relation	rd;
 	HeapTupleData rtup;
diff --git a/src/backend/executor/nodeSort.c b/src/backend/executor/nodeSort.c
index 12c6f82a8b2..e0543a28109 100644
--- a/src/backend/executor/nodeSort.c
+++ b/src/backend/executor/nodeSort.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/executor/nodeSort.c,v 1.32 2001/03/22 06:16:13 momjian Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/executor/nodeSort.c,v 1.33 2001/05/07 00:43:18 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -20,24 +20,24 @@
 #include "utils/tuplesort.h"
 
 /* ----------------------------------------------------------------
- *		FormSortKeys(node)
+ *		ExtractSortKeys
  *
- *		Forms the structure containing information used to sort the relation.
+ *		Extract the sorting key information from the plan node.
  *
- *		Returns an array of ScanKeyData.
+ *		Returns two palloc'd arrays, one of sort operator OIDs and
+ *		one of attribute numbers.
  * ----------------------------------------------------------------
  */
-static ScanKey
-FormSortKeys(Sort *sortnode)
+static void
+ExtractSortKeys(Sort *sortnode,
+				Oid **sortOperators,
+				AttrNumber **attNums)
 {
-	ScanKey		sortkeys;
 	List	   *targetList;
-	List	   *tl;
 	int			keycount;
-	Resdom	   *resdom;
-	AttrNumber	resno;
-	Index		reskey;
-	Oid			reskeyop;
+	Oid		   *sortOps;
+	AttrNumber *attNos;
+	List	   *tl;
 
 	/*
 	 * get information from the node
@@ -46,36 +46,33 @@ FormSortKeys(Sort *sortnode)
 	keycount = sortnode->keycount;
 
 	/*
-	 * first allocate space for scan keys
+	 * first allocate space for results
 	 */
 	if (keycount <= 0)
-		elog(ERROR, "FormSortKeys: keycount <= 0");
-	sortkeys = (ScanKey) palloc(keycount * sizeof(ScanKeyData));
-	MemSet((char *) sortkeys, 0, keycount * sizeof(ScanKeyData));
+		elog(ERROR, "ExtractSortKeys: keycount <= 0");
+	sortOps = (Oid *) palloc(keycount * sizeof(Oid));
+	MemSet(sortOps, 0, keycount * sizeof(Oid));
+	*sortOperators = sortOps;
+	attNos = (AttrNumber *) palloc(keycount * sizeof(AttrNumber));
+	MemSet(attNos, 0, keycount * sizeof(AttrNumber));
+	*attNums = attNos;
 
 	/*
-	 * form each scan key from the resdom info in the target list
+	 * extract info from the resdom nodes in the target list
 	 */
 	foreach(tl, targetList)
 	{
 		TargetEntry *target = (TargetEntry *) lfirst(tl);
-
-		resdom = target->resdom;
-		resno = resdom->resno;
-		reskey = resdom->reskey;
-		reskeyop = resdom->reskeyop;
+		Resdom	   *resdom = target->resdom;
+		Index		reskey = resdom->reskey;
 
 		if (reskey > 0)			/* ignore TLEs that are not sort keys */
 		{
-			ScanKeyEntryInitialize(&sortkeys[reskey - 1],
-								   0x0,
-								   resno,
-								   (RegProcedure) reskeyop,
-								   (Datum) 0);
+			Assert(reskey <= keycount);
+			sortOps[reskey - 1] = resdom->reskeyop;
+			attNos[reskey - 1] = resdom->resno;
 		}
 	}
-
-	return sortkeys;
 }
 
 /* ----------------------------------------------------------------
@@ -124,8 +121,8 @@ ExecSort(Sort *node)
 	{
 		Plan	   *outerNode;
 		TupleDesc	tupDesc;
-		int			keycount;
-		ScanKey		sortkeys;
+		Oid		   *sortOperators;
+		AttrNumber *attNums;
 
 		SO1_printf("ExecSort: %s\n",
 				   "sorting subplan");
@@ -145,14 +142,17 @@ ExecSort(Sort *node)
 
 		outerNode = outerPlan((Plan *) node);
 		tupDesc = ExecGetTupType(outerNode);
-		keycount = node->keycount;
-		sortkeys = (ScanKey) sortstate->sort_Keys;
 
-		tuplesortstate = tuplesort_begin_heap(tupDesc, keycount, sortkeys,
-											  true /* randomAccess */ );
+		ExtractSortKeys(node, &sortOperators, &attNums);
 
+		tuplesortstate = tuplesort_begin_heap(tupDesc, node->keycount,
+											  sortOperators, attNums,
+											  true /* randomAccess */ );
 		sortstate->tuplesortstate = (void *) tuplesortstate;
 
+		pfree(sortOperators);
+		pfree(attNums);
+
 		/*
 		 * Scan the subplan and feed all the tuples to tuplesort.
 		 */
@@ -230,7 +230,6 @@ ExecInitSort(Sort *node, EState *estate, Plan *parent)
 	 */
 	sortstate = makeNode(SortState);
 	sortstate->sort_Done = false;
-	sortstate->sort_Keys = NULL;
 	sortstate->tuplesortstate = NULL;
 
 	node->sortstate = sortstate;
@@ -259,11 +258,6 @@ ExecInitSort(Sort *node, EState *estate, Plan *parent)
 	ExecInitNode(outerPlan, estate, (Plan *) node);
 
 	/*
-	 * initialize sortstate information
-	 */
-	sortstate->sort_Keys = FormSortKeys(node);
-
-	/*
 	 * initialize tuple type.  no need to initialize projection info
 	 * because this node doesn't do projections.
 	 */
@@ -321,9 +315,6 @@ ExecEndSort(Sort *node)
 		tuplesort_end((Tuplesortstate *) sortstate->tuplesortstate);
 	sortstate->tuplesortstate = NULL;
 
-	if (sortstate->sort_Keys != NULL)
-		pfree(sortstate->sort_Keys);
-
 	pfree(sortstate);
 	node->sortstate = NULL;
 
diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c
index ad50630931e..ee5a803b802 100644
--- a/src/backend/nodes/copyfuncs.c
+++ b/src/backend/nodes/copyfuncs.c
@@ -15,7 +15,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/nodes/copyfuncs.c,v 1.140 2001/03/22 06:16:14 momjian Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/nodes/copyfuncs.c,v 1.141 2001/05/07 00:43:18 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -1378,8 +1378,8 @@ _copyRestrictInfo(RestrictInfo *from)
 	newnode->left_pathkey = NIL;
 	newnode->right_pathkey = NIL;
 	newnode->hashjoinoperator = from->hashjoinoperator;
-	newnode->left_dispersion = from->left_dispersion;
-	newnode->right_dispersion = from->right_dispersion;
+	newnode->left_bucketsize = from->left_bucketsize;
+	newnode->right_bucketsize = from->right_bucketsize;
 
 	return newnode;
 }
@@ -2209,11 +2209,12 @@ _copyVacuumStmt(VacuumStmt *from)
 {
 	VacuumStmt *newnode = makeNode(VacuumStmt);
 
-	newnode->verbose = from->verbose;
+	newnode->vacuum = from->vacuum;
 	newnode->analyze = from->analyze;
+	newnode->verbose = from->verbose;
 	if (from->vacrel)
 		newnode->vacrel = pstrdup(from->vacrel);
-	Node_Copy(from, newnode, va_spec);
+	Node_Copy(from, newnode, va_cols);
 
 	return newnode;
 }
diff --git a/src/backend/nodes/equalfuncs.c b/src/backend/nodes/equalfuncs.c
index 06ee63bbacd..284a534aa96 100644
--- a/src/backend/nodes/equalfuncs.c
+++ b/src/backend/nodes/equalfuncs.c
@@ -20,7 +20,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/nodes/equalfuncs.c,v 1.88 2001/03/22 03:59:31 momjian Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/nodes/equalfuncs.c,v 1.89 2001/05/07 00:43:19 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -516,7 +516,7 @@ _equalRestrictInfo(RestrictInfo *a, RestrictInfo *b)
 		return false;
 
 	/*
-	 * ignore eval_cost, left/right_pathkey, and left/right_dispersion,
+	 * ignore eval_cost, left/right_pathkey, and left/right_bucketsize,
 	 * since they may not be set yet, and should be derivable from the
 	 * clause anyway
 	 */
@@ -1113,13 +1113,15 @@ _equalDropdbStmt(DropdbStmt *a, DropdbStmt *b)
 static bool
 _equalVacuumStmt(VacuumStmt *a, VacuumStmt *b)
 {
-	if (a->verbose != b->verbose)
+	if (a->vacuum != b->vacuum)
 		return false;
 	if (a->analyze != b->analyze)
 		return false;
+	if (a->verbose != b->verbose)
+		return false;
 	if (!equalstr(a->vacrel, b->vacrel))
 		return false;
-	if (!equal(a->va_spec, b->va_spec))
+	if (!equal(a->va_cols, b->va_cols))
 		return false;
 
 	return true;
diff --git a/src/backend/nodes/readfuncs.c b/src/backend/nodes/readfuncs.c
index 9a071e7a250..4c0c1b03ef5 100644
--- a/src/backend/nodes/readfuncs.c
+++ b/src/backend/nodes/readfuncs.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/nodes/readfuncs.c,v 1.107 2001/03/22 03:59:32 momjian Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/nodes/readfuncs.c,v 1.108 2001/05/07 00:43:19 tgl Exp $
  *
  * NOTES
  *	  Most of the read functions for plan nodes are tested. (In fact, they
@@ -1874,11 +1874,11 @@ _readRestrictInfo(void)
 
 	/* eval_cost is not part of saved representation; compute on first use */
 	local_node->eval_cost = -1;
-	/* ditto for cached pathkeys and dispersion */
+	/* ditto for cached pathkeys and bucketsize */
 	local_node->left_pathkey = NIL;
 	local_node->right_pathkey = NIL;
-	local_node->left_dispersion = -1;
-	local_node->right_dispersion = -1;
+	local_node->left_bucketsize = -1;
+	local_node->right_bucketsize = -1;
 
 	return local_node;
 }
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c
index c52af72a16b..bdfbbb18186 100644
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -41,7 +41,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.70 2001/04/25 22:04:37 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.71 2001/05/07 00:43:20 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -50,11 +50,15 @@
 
 #include <math.h>
 
+#include "catalog/pg_statistic.h"
 #include "executor/nodeHash.h"
 #include "miscadmin.h"
 #include "optimizer/clauses.h"
 #include "optimizer/cost.h"
+#include "optimizer/pathnode.h"
+#include "parser/parsetree.h"
 #include "utils/lsyscache.h"
+#include "utils/syscache.h"
 
 
 /*
@@ -573,7 +577,7 @@ cost_mergejoin(Path *path,
  * 'outer_path' is the path for the outer relation
  * 'inner_path' is the path for the inner relation
  * 'restrictlist' are the RestrictInfo nodes to be applied at the join
- * 'innerdispersion' is an estimate of the dispersion statistic
+ * 'innerbucketsize' is an estimate of the bucketsize statistic
  *				for the inner hash key.
  */
 void
@@ -581,7 +585,7 @@ cost_hashjoin(Path *path,
 			  Path *outer_path,
 			  Path *inner_path,
 			  List *restrictlist,
-			  Selectivity innerdispersion)
+			  Selectivity innerbucketsize)
 {
 	Cost		startup_cost = 0;
 	Cost		run_cost = 0;
@@ -607,22 +611,20 @@ cost_hashjoin(Path *path,
 
 	/*
 	 * The number of tuple comparisons needed is the number of outer
-	 * tuples times the typical hash bucket size.  nodeHash.c tries for
-	 * average bucket loading of NTUP_PER_BUCKET, but that goal will be
-	 * reached only if data values are uniformly distributed among the
-	 * buckets.  To be conservative, we scale up the target bucket size by
-	 * the number of inner rows times inner dispersion, giving an estimate
-	 * of the typical number of duplicates of each value. We then charge
-	 * one cpu_operator_cost per tuple comparison.
+	 * tuples times the typical number of tuples in a hash bucket,
+	 * which is the inner relation size times its bucketsize fraction.
+	 * We charge one cpu_operator_cost per tuple comparison.
 	 */
 	run_cost += cpu_operator_cost * outer_path->parent->rows *
-		NTUP_PER_BUCKET * ceil(inner_path->parent->rows * innerdispersion);
+		ceil(inner_path->parent->rows * innerbucketsize);
 
 	/*
 	 * Estimate the number of tuples that get through the hashing filter
 	 * as one per tuple in the two source relations.  This could be a
 	 * drastic underestimate if there are many equal-keyed tuples in
-	 * either relation, but we have no good way of estimating that...
+	 * either relation, but we have no simple way of estimating that;
+	 * and since this is only a second-order parameter, it's probably
+	 * not worth expending a lot of effort on the estimate.
 	 */
 	ntuples = outer_path->parent->rows + inner_path->parent->rows;
 
@@ -651,7 +653,7 @@ cost_hashjoin(Path *path,
 	/*
 	 * Bias against putting larger relation on inside.	We don't want an
 	 * absolute prohibition, though, since larger relation might have
-	 * better dispersion --- and we can't trust the size estimates
+	 * better bucketsize --- and we can't trust the size estimates
 	 * unreservedly, anyway.  Instead, inflate the startup cost by the
 	 * square root of the size ratio.  (Why square root?  No real good
 	 * reason, but it seems reasonable...)
@@ -663,6 +665,171 @@ cost_hashjoin(Path *path,
 	path->total_cost = startup_cost + run_cost;
 }
 
+/*
+ * Estimate hash bucketsize fraction (ie, number of entries in a bucket
+ * divided by total tuples in relation) if the specified Var is used
+ * as a hash key.
+ *
+ * This statistic is used by cost_hashjoin.  We split out the calculation
+ * because it's useful to cache the result for re-use across multiple path
+ * cost calculations.
+ *
+ * XXX This is really pretty bogus since we're effectively assuming that the
+ * distribution of hash keys will be the same after applying restriction
+ * clauses as it was in the underlying relation.  However, we are not nearly
+ * smart enough to figure out how the restrict clauses might change the
+ * distribution, so this will have to do for now.
+ *
+ * The executor tries for average bucket loading of NTUP_PER_BUCKET by setting
+ * number of buckets equal to ntuples / NTUP_PER_BUCKET, which would yield
+ * a bucketsize fraction of NTUP_PER_BUCKET / ntuples.  But that goal will
+ * be reached only if the data values are uniformly distributed among the
+ * buckets, which requires (a) at least ntuples / NTUP_PER_BUCKET distinct
+ * data values, and (b) a not-too-skewed data distribution.  Otherwise the
+ * buckets will be nonuniformly occupied.  If the other relation in the join
+ * has a similar distribution, the most-loaded buckets are exactly those
+ * that will be probed most often.  Therefore, the "average" bucket size for
+ * costing purposes should really be taken as something close to the "worst
+ * case" bucket size.  We try to estimate this by first scaling up if there
+ * are too few distinct data values, and then scaling up again by the
+ * ratio of the most common value's frequency to the average frequency.
+ *
+ * If no statistics are available, use a default estimate of 0.1.  This will
+ * discourage use of a hash rather strongly if the inner relation is large,
+ * which is what we want.  We do not want to hash unless we know that the
+ * inner rel is well-dispersed (or the alternatives seem much worse).
+ */
+Selectivity
+estimate_hash_bucketsize(Query *root, Var *var)
+{
+	Oid			relid;
+	RelOptInfo *rel;
+	HeapTuple	tuple;
+	Form_pg_statistic stats;
+	double		estfract,
+				ndistinct,
+				needdistinct,
+				mcvfreq,
+				avgfreq;
+	float4	   *numbers;
+	int			nnumbers;
+
+	/*
+	 * Lookup info about var's relation and attribute;
+	 * if none available, return default estimate.
+	 */
+	if (!IsA(var, Var))
+		return 0.1;
+
+	relid = getrelid(var->varno, root->rtable);
+	if (relid == InvalidOid)
+		return 0.1;
+
+	rel = get_base_rel(root, var->varno);
+
+	if (rel->tuples <= 0.0 || rel->rows <= 0.0)
+		return 0.1;				/* ensure we can divide below */
+
+	tuple = SearchSysCache(STATRELATT,
+						   ObjectIdGetDatum(relid),
+						   Int16GetDatum(var->varattno),
+						   0, 0);
+	if (!HeapTupleIsValid(tuple))
+	{
+		/*
+		 * Perhaps the Var is a system attribute; if so, it will have no
+		 * entry in pg_statistic, but we may be able to guess something
+		 * about its distribution anyway.
+		 */
+		switch (var->varattno)
+		{
+			case ObjectIdAttributeNumber:
+			case SelfItemPointerAttributeNumber:
+				/* these are unique, so buckets should be well-distributed */
+				return (double) NTUP_PER_BUCKET / rel->rows;
+			case TableOidAttributeNumber:
+				/* hashing this is a terrible idea... */
+				return 1.0;
+		}
+		return 0.1;
+	}
+	stats = (Form_pg_statistic) GETSTRUCT(tuple);
+
+	/*
+	 * Obtain number of distinct data values in raw relation.
+	 */
+	ndistinct = stats->stadistinct;
+	if (ndistinct < 0.0)
+		ndistinct = -ndistinct * rel->tuples;
+
+	/*
+	 * Adjust ndistinct to account for restriction clauses.  Observe we are
+	 * assuming that the data distribution is affected uniformly by the
+	 * restriction clauses!
+	 *
+	 * XXX Possibly better way, but much more expensive: multiply by
+	 * selectivity of rel's restriction clauses that mention the target Var.
+	 */
+	ndistinct *= rel->rows / rel->tuples;
+
+	/*
+	 * Discourage use of hash join if there seem not to be very many distinct
+	 * data values.  The threshold here is somewhat arbitrary, as is the
+	 * fraction used to "discourage" the choice.
+	 */
+	if (ndistinct < 50.0)
+	{
+		ReleaseSysCache(tuple);
+		return 0.5;
+	}
+
+	/*
+	 * Form initial estimate of bucketsize fraction.  Here we use rel->rows,
+	 * ie the number of rows after applying restriction clauses, because
+	 * that's what the fraction will eventually be multiplied by in
+	 * cost_heapjoin.
+	 */
+	estfract = (double) NTUP_PER_BUCKET / rel->rows;
+
+	/*
+	 * Adjust estimated bucketsize if too few distinct values to fill
+	 * all the buckets.
+	 */
+	needdistinct = rel->rows / (double) NTUP_PER_BUCKET;
+	if (ndistinct < needdistinct)
+		estfract *= needdistinct / ndistinct;
+
+	/*
+	 * Look up the frequency of the most common value, if available.
+	 */
+	mcvfreq = 0.0;
+
+	if (get_attstatsslot(tuple, var->vartype, var->vartypmod,
+						 STATISTIC_KIND_MCV, InvalidOid,
+						 NULL, NULL, &numbers, &nnumbers))
+	{
+		/*
+		 * The first MCV stat is for the most common value.
+		 */
+		if (nnumbers > 0)
+			mcvfreq = numbers[0];
+		free_attstatsslot(var->vartype, NULL, 0,
+						  numbers, nnumbers);
+	}
+
+	/*
+	 * Adjust estimated bucketsize upward to account for skewed distribution.
+	 */
+	avgfreq = (1.0 - stats->stanullfrac) / ndistinct;
+
+	if (avgfreq > 0.0 && mcvfreq > avgfreq)
+		estfract *= mcvfreq / avgfreq;
+
+	ReleaseSysCache(tuple);
+
+	return (Selectivity) estfract;
+}
+
 
 /*
  * cost_qual_eval
diff --git a/src/backend/optimizer/path/joinpath.c b/src/backend/optimizer/path/joinpath.c
index d41336ddcee..cd7cabd41de 100644
--- a/src/backend/optimizer/path/joinpath.c
+++ b/src/backend/optimizer/path/joinpath.c
@@ -8,15 +8,15 @@
  *
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/optimizer/path/joinpath.c,v 1.63 2001/04/15 00:48:17 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/optimizer/path/joinpath.c,v 1.64 2001/05/07 00:43:20 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
+#include "postgres.h"
+
 #include <sys/types.h>
 #include <math.h>
 
-#include "postgres.h"
-
 #include "optimizer/clauses.h"
 #include "optimizer/cost.h"
 #include "optimizer/pathnode.h"
@@ -45,7 +45,6 @@ static void hash_inner_and_outer(Query *root, RelOptInfo *joinrel,
 					 List *restrictlist, JoinType jointype);
 static Path *best_innerjoin(List *join_paths, List *outer_relid,
 			   JoinType jointype);
-static Selectivity estimate_dispersion(Query *root, Var *var);
 static List *select_mergejoin_clauses(RelOptInfo *joinrel,
 						 RelOptInfo *outerrel,
 						 RelOptInfo *innerrel,
@@ -722,7 +721,7 @@ hash_inner_and_outer(Query *root,
 		Expr	   *clause;
 		Var		   *left,
 				   *right;
-		Selectivity innerdispersion;
+		Selectivity innerbucketsize;
 		List	   *hashclauses;
 
 		if (restrictinfo->hashjoinoperator == InvalidOid)
@@ -742,34 +741,34 @@ hash_inner_and_outer(Query *root,
 
 		/*
 		 * Check if clause is usable with these sub-rels, find inner side,
-		 * estimate dispersion of inner var for costing purposes.
+		 * estimate bucketsize of inner var for costing purposes.
 		 *
 		 * Since we tend to visit the same clauses over and over when
-		 * planning a large query, we cache the dispersion estimates in
+		 * planning a large query, we cache the bucketsize estimates in
 		 * the RestrictInfo node to avoid repeated lookups of statistics.
 		 */
 		if (intMember(left->varno, outerrelids) &&
 			intMember(right->varno, innerrelids))
 		{
 			/* righthand side is inner */
-			innerdispersion = restrictinfo->right_dispersion;
-			if (innerdispersion < 0)
+			innerbucketsize = restrictinfo->right_bucketsize;
+			if (innerbucketsize < 0)
 			{
 				/* not cached yet */
-				innerdispersion = estimate_dispersion(root, right);
-				restrictinfo->right_dispersion = innerdispersion;
+				innerbucketsize = estimate_hash_bucketsize(root, right);
+				restrictinfo->right_bucketsize = innerbucketsize;
 			}
 		}
 		else if (intMember(left->varno, innerrelids) &&
 				 intMember(right->varno, outerrelids))
 		{
 			/* lefthand side is inner */
-			innerdispersion = restrictinfo->left_dispersion;
-			if (innerdispersion < 0)
+			innerbucketsize = restrictinfo->left_bucketsize;
+			if (innerbucketsize < 0)
 			{
 				/* not cached yet */
-				innerdispersion = estimate_dispersion(root, left);
-				restrictinfo->left_dispersion = innerdispersion;
+				innerbucketsize = estimate_hash_bucketsize(root, left);
+				restrictinfo->left_bucketsize = innerbucketsize;
 			}
 		}
 		else
@@ -790,7 +789,7 @@ hash_inner_and_outer(Query *root,
 									  innerrel->cheapest_total_path,
 									  restrictlist,
 									  hashclauses,
-									  innerdispersion));
+									  innerbucketsize));
 		if (outerrel->cheapest_startup_path != outerrel->cheapest_total_path)
 			add_path(joinrel, (Path *)
 					 create_hashjoin_path(joinrel,
@@ -799,7 +798,7 @@ hash_inner_and_outer(Query *root,
 										  innerrel->cheapest_total_path,
 										  restrictlist,
 										  hashclauses,
-										  innerdispersion));
+										  innerbucketsize));
 	}
 }
 
@@ -867,31 +866,6 @@ best_innerjoin(List *join_paths, Relids outer_relids, JoinType jointype)
 }
 
 /*
- * Estimate dispersion of the specified Var
- *
- * We use a default of 0.1 if we can't figure out anything better.
- * This will typically discourage use of a hash rather strongly,
- * if the inner relation is large.	We do not want to hash unless
- * we know that the inner rel is well-dispersed (or the alternatives
- * seem much worse).
- */
-static Selectivity
-estimate_dispersion(Query *root, Var *var)
-{
-	Oid			relid;
-
-	if (!IsA(var, Var))
-		return 0.1;
-
-	relid = getrelid(var->varno, root->rtable);
-
-	if (relid == InvalidOid)
-		return 0.1;
-
-	return (Selectivity) get_attdispersion(relid, var->varattno, 0.1);
-}
-
-/*
  * select_mergejoin_clauses
  *	  Select mergejoin clauses that are usable for a particular join.
  *	  Returns a list of RestrictInfo nodes for those clauses.
diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c
index 8c3b00289d3..2d264c46881 100644
--- a/src/backend/optimizer/plan/createplan.c
+++ b/src/backend/optimizer/plan/createplan.c
@@ -10,14 +10,14 @@
  *
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/optimizer/plan/createplan.c,v 1.104 2001/03/22 03:59:36 momjian Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/optimizer/plan/createplan.c,v 1.105 2001/05/07 00:43:20 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
-#include <sys/types.h>
-
 #include "postgres.h"
 
+#include <sys/types.h>
+
 #include "catalog/pg_index.h"
 #include "nodes/makefuncs.h"
 #include "nodes/nodeFuncs.h"
@@ -1484,9 +1484,9 @@ make_sort_from_pathkeys(List *tlist, Plan *lefttree, List *pathkeys)
 		 */
 		if (resdom->reskey == 0)
 		{
-			/* OK, mark it as a sort key and set the sort operator regproc */
+			/* OK, mark it as a sort key and set the sort operator */
 			resdom->reskey = ++numsortkeys;
-			resdom->reskeyop = get_opcode(pathkey->sortop);
+			resdom->reskeyop = pathkey->sortop;
 		}
 	}
 
diff --git a/src/backend/optimizer/plan/initsplan.c b/src/backend/optimizer/plan/initsplan.c
index 7c3e15a8f88..5d67e02dacb 100644
--- a/src/backend/optimizer/plan/initsplan.c
+++ b/src/backend/optimizer/plan/initsplan.c
@@ -8,13 +8,14 @@
  *
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/optimizer/plan/initsplan.c,v 1.59 2001/04/16 19:44:10 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/optimizer/plan/initsplan.c,v 1.60 2001/05/07 00:43:21 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
+#include "postgres.h"
+
 #include <sys/types.h>
 
-#include "postgres.h"
 #include "catalog/pg_operator.h"
 #include "catalog/pg_type.h"
 #include "nodes/makefuncs.h"
@@ -348,8 +349,8 @@ distribute_qual_to_rels(Query *root, Node *clause,
 	restrictinfo->left_pathkey = NIL;	/* not computable yet */
 	restrictinfo->right_pathkey = NIL;
 	restrictinfo->hashjoinoperator = InvalidOid;
-	restrictinfo->left_dispersion = -1; /* not computed until needed */
-	restrictinfo->right_dispersion = -1;
+	restrictinfo->left_bucketsize = -1; /* not computed until needed */
+	restrictinfo->right_bucketsize = -1;
 
 	/*
 	 * Retrieve all relids and vars contained within the clause.
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index b2ab4600209..0aba4808c16 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/optimizer/plan/planner.c,v 1.105 2001/04/30 19:24:47 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/optimizer/plan/planner.c,v 1.106 2001/05/07 00:43:21 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -1367,7 +1367,7 @@ make_groupplan(List *group_tlist,
 			{
 				/* OK, insert the ordering info needed by the executor. */
 				resdom->reskey = ++keyno;
-				resdom->reskeyop = get_opcode(grpcl->sortop);
+				resdom->reskeyop = grpcl->sortop;
 			}
 		}
 
@@ -1412,7 +1412,7 @@ make_sortplan(List *tlist, Plan *plannode, List *sortcls)
 		{
 			/* OK, insert the ordering info needed by the executor. */
 			resdom->reskey = ++keyno;
-			resdom->reskeyop = get_opcode(sortcl->sortop);
+			resdom->reskeyop = sortcl->sortop;
 		}
 	}
 
diff --git a/src/backend/optimizer/prep/prepunion.c b/src/backend/optimizer/prep/prepunion.c
index 0b173466cf9..ede4159d970 100644
--- a/src/backend/optimizer/prep/prepunion.c
+++ b/src/backend/optimizer/prep/prepunion.c
@@ -14,7 +14,7 @@
  *
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/optimizer/prep/prepunion.c,v 1.62 2001/03/27 18:02:19 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/optimizer/prep/prepunion.c,v 1.63 2001/05/07 00:43:22 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -682,8 +682,8 @@ adjust_inherited_attrs_mutator(Node *node,
 		newinfo->eval_cost = -1;		/* reset this too */
 		newinfo->left_pathkey = NIL;	/* and these */
 		newinfo->right_pathkey = NIL;
-		newinfo->left_dispersion = -1;
-		newinfo->right_dispersion = -1;
+		newinfo->left_bucketsize = -1;
+		newinfo->right_bucketsize = -1;
 
 		return (Node *) newinfo;
 	}
diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c
index cfba3ee395f..407c132b4f7 100644
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -8,14 +8,14 @@
  *
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/optimizer/util/pathnode.c,v 1.71 2001/03/22 03:59:39 momjian Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/optimizer/util/pathnode.c,v 1.72 2001/05/07 00:43:22 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
-#include <math.h>
-
 #include "postgres.h"
 
+#include <math.h>
+
 #include "nodes/plannodes.h"
 #include "optimizer/cost.h"
 #include "optimizer/pathnode.h"
@@ -559,7 +559,7 @@ create_mergejoin_path(RelOptInfo *joinrel,
  * 'restrict_clauses' are the RestrictInfo nodes to apply at the join
  * 'hashclauses' is a list of the hash join clause (always a 1-element list)
  *		(this should be a subset of the restrict_clauses list)
- * 'innerdispersion' is an estimate of the dispersion of the inner hash key
+ * 'innerbucketsize' is an estimate of the bucketsize of the inner hash key
  *
  */
 HashPath   *
@@ -569,7 +569,7 @@ create_hashjoin_path(RelOptInfo *joinrel,
 					 Path *inner_path,
 					 List *restrict_clauses,
 					 List *hashclauses,
-					 Selectivity innerdispersion)
+					 Selectivity innerbucketsize)
 {
 	HashPath   *pathnode = makeNode(HashPath);
 
@@ -587,7 +587,7 @@ create_hashjoin_path(RelOptInfo *joinrel,
 				  outer_path,
 				  inner_path,
 				  restrict_clauses,
-				  innerdispersion);
+				  innerbucketsize);
 
 	return pathnode;
 }
diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c
index 4f711df203c..ee3523553e8 100644
--- a/src/backend/optimizer/util/plancat.c
+++ b/src/backend/optimizer/util/plancat.c
@@ -9,11 +9,10 @@
  *
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/optimizer/util/plancat.c,v 1.64 2001/03/22 03:59:40 momjian Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/optimizer/util/plancat.c,v 1.65 2001/05/07 00:43:22 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
-
 #include "postgres.h"
 
 #include <math.h>
diff --git a/src/backend/parser/analyze.c b/src/backend/parser/analyze.c
index 4687a559962..76cc095bc4e 100644
--- a/src/backend/parser/analyze.c
+++ b/src/backend/parser/analyze.c
@@ -6,7 +6,7 @@
  * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- *	$Header: /cvsroot/pgsql/src/backend/parser/analyze.c,v 1.183 2001/03/22 06:16:15 momjian Exp $
+ *	$Header: /cvsroot/pgsql/src/backend/parser/analyze.c,v 1.184 2001/05/07 00:43:22 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -2660,7 +2660,7 @@ transformForUpdate(Query *qry, List *forUpdate)
 		/* just the named tables */
 		foreach(l, forUpdate)
 		{
-			char	   *relname = lfirst(l);
+			char	   *relname = strVal(lfirst(l));
 
 			i = 0;
 			foreach(rt, qry->rtable)
diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y
index bed0ce239a4..40c379aca51 100644
--- a/src/backend/parser/gram.y
+++ b/src/backend/parser/gram.y
@@ -11,7 +11,7 @@
  *
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/parser/gram.y,v 2.222 2001/05/01 01:36:10 thomas Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/parser/gram.y,v 2.223 2001/05/07 00:43:23 tgl Exp $
  *
  * HISTORY
  *	  AUTHOR			DATE			MAJOR EVENT
@@ -104,7 +104,6 @@ static void doNegateFloat(Value *v);
 	char				*str;
 	bool				boolean;
 	JoinType			jtype;
-	InhOption			inhOpt;
 	List				*list;
 	Node				*node;
 	Value				*value;
@@ -130,6 +129,7 @@ static void doNegateFloat(Value *v);
 
 %type <node>	stmt,
 		AlterGroupStmt, AlterSchemaStmt, AlterTableStmt, AlterUserStmt,
+		AnalyzeStmt,
 		ClosePortalStmt, ClusterStmt, CommentStmt, ConstraintsSetStmt,
 		CopyStmt, CreateAsStmt, CreateGroupStmt, CreatePLangStmt,
 		CreateSchemaStmt, CreateSeqStmt, CreateStmt, CreateTrigStmt,
@@ -147,7 +147,7 @@ static void doNegateFloat(Value *v);
 %type <node>	select_no_parens, select_with_parens, select_clause,
 				simple_select
 
-%type <node>    alter_column_action
+%type <node>    alter_column_default
 %type <ival>    drop_behavior
 
 %type <list>	createdb_opt_list, createdb_opt_item
@@ -185,7 +185,7 @@ static void doNegateFloat(Value *v);
 		OptTableElementList, OptInherit, definition, opt_distinct,
 		opt_with, func_args, func_args_list, func_as,
 		oper_argtypes, RuleActionList, RuleActionMulti,
-		opt_column_list, columnList, opt_va_list, va_list,
+		opt_column_list, columnList, opt_name_list,
 		sort_clause, sortby_list, index_params, index_list, name_list,
 		from_clause, from_list, opt_array_bounds,
 		expr_list, attrs, target_list, update_target_list,
@@ -210,9 +210,7 @@ static void doNegateFloat(Value *v);
 %type <node>	substr_from, substr_for
 
 %type <boolean>	opt_binary, opt_using, opt_instead, opt_cursor
-%type <boolean>	opt_with_copy, index_opt_unique, opt_verbose, opt_analyze
-
-%type <inhOpt>	opt_inh_star, opt_only
+%type <boolean>	opt_with_copy, index_opt_unique, opt_verbose, analyze_keyword
 
 %type <ival>	copy_dirn, direction, reindex_type, drop_type,
 		opt_column, event, comment_type, comment_cl,
@@ -350,7 +348,8 @@ static void doNegateFloat(Value *v);
 		NEW, NOCREATEDB, NOCREATEUSER, NONE, NOTHING, NOTIFY, NOTNULL,
 		OFFSET, OIDS, OPERATOR, OWNER, PASSWORD, PROCEDURAL,
 		REINDEX, RENAME, RESET, RETURNS, ROW, RULE,
-		SEQUENCE, SERIAL, SETOF, SHARE, SHOW, START, STATEMENT, STDIN, STDOUT, SYSID,
+		SEQUENCE, SERIAL, SETOF, SHARE, SHOW, START, STATEMENT,
+		STATISTICS, STDIN, STDOUT, SYSID,
 		TEMP, TEMPLATE, TOAST, TRUNCATE, TRUSTED, 
 		UNLISTEN, UNTIL, VACUUM, VALID, VERBOSE, VERSION
 
@@ -470,6 +469,7 @@ stmt :	AlterSchemaStmt
 		| CreatedbStmt
 		| DropdbStmt
 		| VacuumStmt
+		| AnalyzeStmt
 		| VariableSetStmt
 		| VariableShowStmt
 		| VariableResetStmt
@@ -938,57 +938,68 @@ CheckPointStmt: CHECKPOINT
  *****************************************************************************/
 
 AlterTableStmt:
-/* ALTER TABLE <name> ADD [COLUMN] <coldef> */
-		ALTER TABLE relation_name opt_inh_star ADD opt_column columnDef
+/* ALTER TABLE <relation> ADD [COLUMN] <coldef> */
+		ALTER TABLE relation_expr ADD opt_column columnDef
 				{
 					AlterTableStmt *n = makeNode(AlterTableStmt);
 					n->subtype = 'A';
-					n->relname = $3;
-					n->inhOpt = $4;
-					n->def = $7;
+					n->relname = $3->relname;
+					n->inhOpt = $3->inhOpt;
+					n->def = $6;
 					$$ = (Node *)n;
 				}
-/* ALTER TABLE <name> ALTER [COLUMN] <colname> {SET DEFAULT <expr>|DROP DEFAULT} */
-		| ALTER TABLE relation_name opt_inh_star ALTER opt_column ColId alter_column_action
+/* ALTER TABLE <relation> ALTER [COLUMN] <colname> {SET DEFAULT <expr>|DROP DEFAULT} */
+		| ALTER TABLE relation_expr ALTER opt_column ColId alter_column_default
 				{
 					AlterTableStmt *n = makeNode(AlterTableStmt);
 					n->subtype = 'T';
-					n->relname = $3;
-					n->inhOpt = $4;
-					n->name = $7;
-					n->def = $8;
+					n->relname = $3->relname;
+					n->inhOpt = $3->inhOpt;
+					n->name = $6;
+					n->def = $7;
 					$$ = (Node *)n;
 				}
-/* ALTER TABLE <name> DROP [COLUMN] <name> {RESTRICT|CASCADE} */
-		| ALTER TABLE relation_name opt_inh_star DROP opt_column ColId drop_behavior
+/* ALTER TABLE <relation> ALTER [COLUMN] <colname> SET STATISTICS <Iconst> */
+		| ALTER TABLE relation_expr ALTER opt_column ColId SET STATISTICS Iconst
+				{
+					AlterTableStmt *n = makeNode(AlterTableStmt);
+					n->subtype = 'S';
+					n->relname = $3->relname;
+					n->inhOpt = $3->inhOpt;
+					n->name = $6;
+					n->def = (Node *) makeInteger($9);
+					$$ = (Node *)n;
+				}
+/* ALTER TABLE <relation> DROP [COLUMN] <colname> {RESTRICT|CASCADE} */
+		| ALTER TABLE relation_expr DROP opt_column ColId drop_behavior
 				{
 					AlterTableStmt *n = makeNode(AlterTableStmt);
 					n->subtype = 'D';
-					n->relname = $3;
-					n->inhOpt = $4;
-					n->name = $7;
-					n->behavior = $8;
+					n->relname = $3->relname;
+					n->inhOpt = $3->inhOpt;
+					n->name = $6;
+					n->behavior = $7;
 					$$ = (Node *)n;
 				}
-/* ALTER TABLE <name> ADD CONSTRAINT ... */
-		| ALTER TABLE relation_name opt_inh_star ADD TableConstraint
+/* ALTER TABLE <relation> ADD CONSTRAINT ... */
+		| ALTER TABLE relation_expr ADD TableConstraint
 				{
 					AlterTableStmt *n = makeNode(AlterTableStmt);
 					n->subtype = 'C';
-					n->relname = $3;
-					n->inhOpt = $4;
-					n->def = $6;
+					n->relname = $3->relname;
+					n->inhOpt = $3->inhOpt;
+					n->def = $5;
 					$$ = (Node *)n;
 				}
-/* ALTER TABLE <name> DROP CONSTRAINT <name> {RESTRICT|CASCADE} */
-		| ALTER TABLE relation_name opt_inh_star DROP CONSTRAINT name drop_behavior
+/* ALTER TABLE <relation> DROP CONSTRAINT <name> {RESTRICT|CASCADE} */
+		| ALTER TABLE relation_expr DROP CONSTRAINT name drop_behavior
 				{
 					AlterTableStmt *n = makeNode(AlterTableStmt);
 					n->subtype = 'X';
-					n->relname = $3;
-					n->inhOpt = $4;
-					n->name = $7;
-					n->behavior = $8;
+					n->relname = $3->relname;
+					n->inhOpt = $3->inhOpt;
+					n->name = $6;
+					n->behavior = $7;
 					$$ = (Node *)n;
 				}
 /* ALTER TABLE <name> CREATE TOAST TABLE */
@@ -997,6 +1008,7 @@ AlterTableStmt:
 					AlterTableStmt *n = makeNode(AlterTableStmt);
 					n->subtype = 'E';
 					n->relname = $3;
+					n->inhOpt = INH_NO;
 					$$ = (Node *)n;
 				}
 /* ALTER TABLE <name> OWNER TO UserId */
@@ -1005,12 +1017,13 @@ AlterTableStmt:
 					AlterTableStmt *n = makeNode(AlterTableStmt);
 					n->subtype = 'U';
 					n->relname = $3;
+					n->inhOpt = INH_NO;
 					n->name = $6;
 					$$ = (Node *)n;
 				}
 		;
 
-alter_column_action:
+alter_column_default:
 		SET DEFAULT a_expr
 			{
 				/* Treat SET DEFAULT NULL the same as DROP DEFAULT */
@@ -1478,10 +1491,6 @@ key_reference:  NO ACTION				{ $$ = FKCONSTR_ON_KEY_NOACTION; }
 		| SET DEFAULT					{ $$ = FKCONSTR_ON_KEY_SETDEFAULT; }
 		;
 
-opt_only: ONLY              	     	        { $$ = INH_NO; }
-        | /*EMPTY*/								{ $$ = INH_DEFAULT; } 
-		;
-
 OptInherit:  INHERITS '(' relation_name_list ')'	{ $$ = $3; }
 		| /*EMPTY*/									{ $$ = NIL; }
 		;
@@ -2598,14 +2607,13 @@ opt_force:	FORCE									{  $$ = TRUE; }
  *
  *****************************************************************************/
 
-RenameStmt:  ALTER TABLE relation_name opt_inh_star
-				  RENAME opt_column opt_name TO name
+RenameStmt:  ALTER TABLE relation_expr RENAME opt_column opt_name TO name
 				{
 					RenameStmt *n = makeNode(RenameStmt);
-					n->relname = $3;
-					n->inhOpt = $4;
-					n->column = $7;
-					n->newname = $9;
+					n->relname = $3->relname;
+					n->inhOpt = $3->inhOpt;
+					n->column = $6;
+					n->newname = $8;
 					$$ = (Node *)n;
 				}
 		;
@@ -2994,49 +3002,71 @@ ClusterStmt:  CLUSTER index_name ON relation_name
  *
  *		QUERY:
  *				vacuum
+ *				analyze
  *
  *****************************************************************************/
 
-VacuumStmt:  VACUUM opt_verbose opt_analyze
+VacuumStmt:  VACUUM opt_verbose
 				{
 					VacuumStmt *n = makeNode(VacuumStmt);
+					n->vacuum = true;
+					n->analyze = false;
 					n->verbose = $2;
-					n->analyze = $3;
 					n->vacrel = NULL;
-					n->va_spec = NIL;
+					n->va_cols = NIL;
 					$$ = (Node *)n;
 				}
-		| VACUUM opt_verbose opt_analyze relation_name opt_va_list
+		| VACUUM opt_verbose relation_name
 				{
 					VacuumStmt *n = makeNode(VacuumStmt);
+					n->vacuum = true;
+					n->analyze = false;
 					n->verbose = $2;
-					n->analyze = $3;
-					n->vacrel = $4;
-					n->va_spec = $5;
-					if ( $5 != NIL && !$4 )
-						elog(ERROR,"VACUUM syntax error at or near \"(\""
-							"\n\tRelation name must be specified");
+					n->vacrel = $3;
+					n->va_cols = NIL;
+					$$ = (Node *)n;
+				}
+		| VACUUM opt_verbose AnalyzeStmt
+				{
+					VacuumStmt *n = (VacuumStmt *) $3;
+					n->vacuum = true;
+					n->verbose |= $2;
 					$$ = (Node *)n;
 				}
 		;
 
-opt_verbose:  VERBOSE							{ $$ = TRUE; }
-		| /*EMPTY*/								{ $$ = FALSE; }
+AnalyzeStmt:  analyze_keyword opt_verbose
+				{
+					VacuumStmt *n = makeNode(VacuumStmt);
+					n->vacuum = false;
+					n->analyze = true;
+					n->verbose = $2;
+					n->vacrel = NULL;
+					n->va_cols = NIL;
+					$$ = (Node *)n;
+				}
+		| analyze_keyword opt_verbose relation_name opt_name_list
+				{
+					VacuumStmt *n = makeNode(VacuumStmt);
+					n->vacuum = false;
+					n->analyze = true;
+					n->verbose = $2;
+					n->vacrel = $3;
+					n->va_cols = $4;
+					$$ = (Node *)n;
+				}
 		;
 
-opt_analyze:  ANALYZE							{ $$ = TRUE; }
+analyze_keyword:  ANALYZE						{ $$ = TRUE; }
 		|	  ANALYSE /* British */				{ $$ = TRUE; }
-		| /*EMPTY*/								{ $$ = FALSE; }
 		;
 
-opt_va_list:  '(' va_list ')'					{ $$ = $2; }
-		| /*EMPTY*/								{ $$ = NIL; }
+opt_verbose:  VERBOSE							{ $$ = TRUE; }
+		| /*EMPTY*/								{ $$ = FALSE; }
 		;
 
-va_list:  name
-				{ $$ = makeList1($1); }
-		| va_list ',' name
-				{ $$ = lappend($1, $3); }
+opt_name_list:  '(' name_list ')'				{ $$ = $2; }
+		| /*EMPTY*/								{ $$ = NIL; }
 		;
 
 
@@ -3160,12 +3190,12 @@ columnElem:  ColId opt_indirection
  *
  *****************************************************************************/
 
-DeleteStmt:  DELETE FROM opt_only relation_name where_clause
+DeleteStmt:  DELETE FROM relation_expr where_clause
 				{
 					DeleteStmt *n = makeNode(DeleteStmt);
-					n->inhOpt = $3;
-					n->relname = $4;
-					n->whereClause = $5;
+					n->relname = $3->relname;
+					n->inhOpt = $3->inhOpt;
+					n->whereClause = $4;
 					$$ = (Node *)n;
 				}
 		;
@@ -3202,17 +3232,17 @@ opt_lmode:	SHARE				{ $$ = TRUE; }
  *
  *****************************************************************************/
 
-UpdateStmt:  UPDATE opt_only relation_name
+UpdateStmt:  UPDATE relation_expr
 			  SET update_target_list
 			  from_clause
 			  where_clause
 				{
 					UpdateStmt *n = makeNode(UpdateStmt);
-					n->inhOpt = $2;
-					n->relname = $3;
-					n->targetList = $5;
-					n->fromClause = $6;
-					n->whereClause = $7;
+					n->relname = $2->relname;
+					n->inhOpt = $2->inhOpt;
+					n->targetList = $4;
+					n->fromClause = $5;
+					n->whereClause = $6;
 					$$ = (Node *)n;
 				}
 		;
@@ -3545,10 +3575,6 @@ select_offset_value:	Iconst
  *	...however, recursive addattr and rename supported.  make special
  *	cases for these.
  */
-opt_inh_star:  '*'								{ $$ = INH_YES; }
-		| /*EMPTY*/								{ $$ = INH_DEFAULT; }
-		;
-
 relation_name_list:  name_list;
 
 name_list:  name
@@ -3576,7 +3602,7 @@ opt_for_update_clause:	for_update_clause		{ $$ = $1; }
 		| /* EMPTY */							{ $$ = NULL; }
 		;
 
-update_list:  OF va_list						{ $$ = $2; }
+update_list:  OF name_list						{ $$ = $2; }
 		| /* EMPTY */							{ $$ = makeList1(NULL); }
 		;
 
@@ -5525,6 +5551,7 @@ TokenId:  ABSOLUTE						{ $$ = "absolute"; }
 		| SHARE							{ $$ = "share"; }
 		| START							{ $$ = "start"; }
 		| STATEMENT						{ $$ = "statement"; }
+		| STATISTICS					{ $$ = "statistics"; }
 		| STDIN							{ $$ = "stdin"; }
 		| STDOUT						{ $$ = "stdout"; }
 		| SYSID							{ $$ = "sysid"; }
diff --git a/src/backend/parser/keywords.c b/src/backend/parser/keywords.c
index 402dbfd28ca..8ab19f86ae8 100644
--- a/src/backend/parser/keywords.c
+++ b/src/backend/parser/keywords.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/parser/keywords.c,v 1.90 2001/03/22 03:59:40 momjian Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/parser/keywords.c,v 1.91 2001/05/07 00:43:23 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -238,6 +238,7 @@ static ScanKeyword ScanKeywords[] = {
 	{"some", SOME},
 	{"start", START},
 	{"statement", STATEMENT},
+	{"statistics", STATISTICS},
 	{"stdin", STDIN},
 	{"stdout", STDOUT},
 	{"substring", SUBSTRING},
diff --git a/src/backend/parser/parse_relation.c b/src/backend/parser/parse_relation.c
index f5324cb3735..e1d49842fd2 100644
--- a/src/backend/parser/parse_relation.c
+++ b/src/backend/parser/parse_relation.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/parser/parse_relation.c,v 1.54 2001/04/18 17:04:24 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/parser/parse_relation.c,v 1.55 2001/05/07 00:43:23 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -75,7 +75,7 @@ static struct
 	}
 };
 
-#define SPECIALS ((int) (sizeof(special_attr)/sizeof(special_attr[0])))
+#define SPECIALS ((int) lengthof(special_attr))
 
 
 /*
@@ -670,7 +670,7 @@ isForUpdate(ParseState *pstate, char *relname)
 
 				foreach(l, pstate->p_forUpdate)
 				{
-					char	   *rname = lfirst(l);
+					char	   *rname = strVal(lfirst(l));
 
 					if (strcmp(relname, rname) == 0)
 						return true;
@@ -1020,20 +1020,6 @@ attnameIsSet(Relation rd, char *name)
 
 #endif
 
-#ifdef NOT_USED
-/*
- *	This should only be used if the relation is already
- *	heap_open()'ed.  Use the cache version
- *	for access to non-opened relations.
- */
-int
-attnumAttNelems(Relation rd, int attid)
-{
-	return rd->rd_att->attrs[attid - 1]->attnelems;
-}
-
-#endif
-
 /* given attribute id, return type of that attribute */
 /*
  *	This should only be used if the relation is already
diff --git a/src/backend/tcop/utility.c b/src/backend/tcop/utility.c
index ae6cd20a5db..b616f7e68ef 100644
--- a/src/backend/tcop/utility.c
+++ b/src/backend/tcop/utility.c
@@ -10,7 +10,7 @@
  *
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/tcop/utility.c,v 1.109 2001/03/22 06:16:17 momjian Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/tcop/utility.c,v 1.110 2001/05/07 00:43:23 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -427,13 +427,19 @@ ProcessUtility(Node *parsetree,
 										interpretInhOption(stmt->inhOpt),
 											(ColumnDef *) stmt->def);
 						break;
-					case 'T':	/* ALTER COLUMN */
-						AlterTableAlterColumn(stmt->relname,
+					case 'T':	/* ALTER COLUMN DEFAULT */
+						AlterTableAlterColumnDefault(stmt->relname,
 										interpretInhOption(stmt->inhOpt),
-											  stmt->name,
-											  stmt->def);
+													 stmt->name,
+													 stmt->def);
 						break;
-					case 'D':	/* ALTER DROP */
+					case 'S':	/* ALTER COLUMN STATISTICS */
+						AlterTableAlterColumnStatistics(stmt->relname,
+										interpretInhOption(stmt->inhOpt),
+														stmt->name,
+														stmt->def);
+						break;
+					case 'D':	/* DROP COLUMN */
 						AlterTableDropColumn(stmt->relname,
 										interpretInhOption(stmt->inhOpt),
 											 stmt->name,
@@ -703,12 +709,13 @@ ProcessUtility(Node *parsetree,
 			break;
 
 		case T_VacuumStmt:
-			set_ps_display(commandTag = "VACUUM");
+			if (((VacuumStmt *) parsetree)->vacuum)
+				commandTag = "VACUUM";
+			else
+				commandTag = "ANALYZE";
+			set_ps_display(commandTag);
 
-			vacuum(((VacuumStmt *) parsetree)->vacrel,
-				   ((VacuumStmt *) parsetree)->verbose,
-				   ((VacuumStmt *) parsetree)->analyze,
-				   ((VacuumStmt *) parsetree)->va_spec);
+			vacuum((VacuumStmt *) parsetree);
 			break;
 
 		case T_ExplainStmt:
diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c
index 1fe0afb0a35..41ba82db7b5 100644
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -15,7 +15,7 @@
  *
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/utils/adt/selfuncs.c,v 1.87 2001/03/23 04:49:54 momjian Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/utils/adt/selfuncs.c,v 1.88 2001/05/07 00:43:23 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -57,9 +57,6 @@
 /* default selectivity estimate for pattern-match operators such as LIKE */
 #define DEFAULT_MATCH_SEL	0.01
 
-/* "fudge factor" for estimating frequency of not-most-common values */
-#define NOT_MOST_COMMON_RATIO  0.1
-
 static bool convert_to_scalar(Datum value, Oid valuetypid, double *scaledvalue,
 				  Datum lobound, Datum hibound, Oid boundstypid,
 				  double *scaledlobound, double *scaledhibound);
@@ -75,17 +72,9 @@ static double convert_one_string_to_scalar(unsigned char *value,
 static unsigned char *convert_string_datum(Datum value, Oid typid);
 static double convert_timevalue_to_scalar(Datum value, Oid typid);
 static void getattproperties(Oid relid, AttrNumber attnum,
-				 Oid *typid,
-				 int *typlen,
-				 bool *typbyval,
-				 int32 *typmod);
-static bool getattstatistics(Oid relid, AttrNumber attnum,
-				 Oid typid, int32 typmod,
-				 double *nullfrac,
-				 double *commonfrac,
-				 Datum *commonval,
-				 Datum *loval,
-				 Datum *hival);
+							 Oid *typid, int32 *typmod);
+static double get_att_numdistinct(Oid relid, AttrNumber attnum, Oid typid,
+								  Form_pg_statistic stats);
 static Selectivity prefix_selectivity(char *prefix,
 				   Oid relid,
 				   AttrNumber attno,
@@ -115,134 +104,173 @@ eqsel(PG_FUNCTION_ARGS)
 	AttrNumber	attno = PG_GETARG_INT16(2);
 	Datum		value = PG_GETARG_DATUM(3);
 	int32		flag = PG_GETARG_INT32(4);
-	float8		result;
-
-	if (NONVALUE(attno) || NONVALUE(relid))
-		result = DEFAULT_EQ_SEL;
-	else
+	Oid			typid;
+	int32		typmod;
+	HeapTuple	statsTuple;
+	Datum	   *values;
+	int			nvalues;
+	float4	   *numbers;
+	int			nnumbers;
+	double		selec;
+
+	if (NONVALUE(relid) || NONVALUE(attno))
+		PG_RETURN_FLOAT8(DEFAULT_EQ_SEL);
+
+	/* get info about the attribute */
+	getattproperties(relid, attno, &typid, &typmod);
+
+	/* get stats for the attribute, if available */
+	statsTuple = SearchSysCache(STATRELATT,
+								ObjectIdGetDatum(relid),
+								Int16GetDatum(attno),
+								0, 0);
+	if (HeapTupleIsValid(statsTuple))
 	{
-		Oid			typid;
-		int			typlen;
-		bool		typbyval;
-		int32		typmod;
-		double		nullfrac;
-		double		commonfrac;
-		Datum		commonval;
-		double		selec;
-
-		/* get info about the attribute */
-		getattproperties(relid, attno,
-						 &typid, &typlen, &typbyval, &typmod);
-
-		/* get stats for the attribute, if available */
-		if (getattstatistics(relid, attno, typid, typmod,
-							 &nullfrac, &commonfrac, &commonval,
-							 NULL, NULL))
-		{
-			if (flag & SEL_CONSTANT)
-			{
+		Form_pg_statistic stats;
 
-				/*
-				 * Is the constant "=" to the column's most common value?
-				 * (Although the operator may not really be "=", we will
-				 * assume that seeing whether it returns TRUE for the most
-				 * common value is useful information. If you don't like
-				 * it, maybe you shouldn't be using eqsel for your
-				 * operator...)
-				 */
-				RegProcedure eqproc = get_opcode(opid);
-				bool		mostcommon;
+		stats = (Form_pg_statistic) GETSTRUCT(statsTuple);
 
-				if (eqproc == (RegProcedure) NULL)
-					elog(ERROR, "eqsel: no procedure for operator %u",
-						 opid);
+		if (flag & SEL_CONSTANT)
+		{
+			bool	match = false;
+			int		i;
 
-				/* be careful to apply operator right way 'round */
-				if (flag & SEL_RIGHT)
-					mostcommon = DatumGetBool(OidFunctionCall2(eqproc,
-															   commonval,
-															   value));
-				else
-					mostcommon = DatumGetBool(OidFunctionCall2(eqproc,
-															   value,
-															 commonval));
+			/*
+			 * Is the constant "=" to any of the column's most common
+			 * values?  (Although the given operator may not really be
+			 * "=", we will assume that seeing whether it returns TRUE
+			 * is an appropriate test.  If you don't like this, maybe you
+			 * shouldn't be using eqsel for your operator...)
+			 */
+			if (get_attstatsslot(statsTuple, typid, typmod,
+								 STATISTIC_KIND_MCV, InvalidOid,
+								 &values, &nvalues,
+								 &numbers, &nnumbers))
+			{
+				FmgrInfo	eqproc;
 
-				if (mostcommon)
-				{
+				fmgr_info(get_opcode(opid), &eqproc);
 
-					/*
-					 * Constant is "=" to the most common value.  We know
-					 * selectivity exactly (or as exactly as VACUUM could
-					 * calculate it, anyway).
-					 */
-					selec = commonfrac;
-				}
-				else
+				for (i = 0; i < nvalues; i++)
 				{
-
-					/*
-					 * Comparison is against a constant that is neither
-					 * the most common value nor null.	Its selectivity
-					 * cannot be more than this:
-					 */
-					selec = 1.0 - commonfrac - nullfrac;
-					if (selec > commonfrac)
-						selec = commonfrac;
-
-					/*
-					 * and in fact it's probably less, so we should apply
-					 * a fudge factor.	The only case where we don't is
-					 * for a boolean column, where indeed we have
-					 * estimated the less-common value's frequency
-					 * exactly!
-					 */
-					if (typid != BOOLOID)
-						selec *= NOT_MOST_COMMON_RATIO;
+					/* be careful to apply operator right way 'round */
+					if (flag & SEL_RIGHT)
+						match = DatumGetBool(FunctionCall2(&eqproc,
+														   values[i],
+														   value));
+					else
+						match = DatumGetBool(FunctionCall2(&eqproc,
+														   value,
+														   values[i]));
+					if (match)
+						break;
 				}
 			}
 			else
 			{
+				/* no most-common-value info available */
+				values = NULL;
+				numbers = NULL;
+				i = nvalues = nnumbers = 0;
+			}
 
+			if (match)
+			{
+				/*
+				 * Constant is "=" to this common value.  We know
+				 * selectivity exactly (or as exactly as VACUUM
+				 * could calculate it, anyway).
+				 */
+				selec = numbers[i];
+			}
+			else
+			{
 				/*
-				 * Search is for a value that we do not know a priori, but
-				 * we will assume it is not NULL.  Selectivity cannot be
-				 * more than this:
+				 * Comparison is against a constant that is neither
+				 * NULL nor any of the common values.  Its selectivity
+				 * cannot be more than this:
 				 */
-				selec = 1.0 - nullfrac;
-				if (selec > commonfrac)
-					selec = commonfrac;
+				double	sumcommon = 0.0;
+				double	otherdistinct;
 
+				for (i = 0; i < nnumbers; i++)
+					sumcommon += numbers[i];
+				selec = 1.0 - sumcommon - stats->stanullfrac;
+				/*
+				 * and in fact it's probably a good deal less.
+				 * We approximate that all the not-common values
+				 * share this remaining fraction equally, so we
+				 * divide by the number of other distinct values.
+				 */
+				otherdistinct = get_att_numdistinct(relid, attno,
+													typid, stats)
+					- nnumbers;
+				if (otherdistinct > 1)
+					selec /= otherdistinct;
 				/*
-				 * and in fact it's probably less, so apply a fudge
-				 * factor.
+				 * Another cross-check: selectivity shouldn't be
+				 * estimated as more than the least common
+				 * "most common value".
 				 */
-				selec *= NOT_MOST_COMMON_RATIO;
+				if (nnumbers > 0 && selec > numbers[nnumbers-1])
+					selec = numbers[nnumbers-1];
 			}
 
-			/* result should be in range, but make sure... */
-			if (selec < 0.0)
-				selec = 0.0;
-			else if (selec > 1.0)
-				selec = 1.0;
-
-			if (!typbyval)
-				pfree(DatumGetPointer(commonval));
+			free_attstatsslot(typid, values, nvalues, numbers, nnumbers);
 		}
 		else
 		{
+			double		ndistinct;
 
 			/*
-			 * No VACUUM ANALYZE stats available, so make a guess using
-			 * the dispersion stat (if we have that, which is unlikely for
-			 * a normal attribute; but for a system attribute we may be
-			 * able to estimate it).
+			 * Search is for a value that we do not know a priori, but
+			 * we will assume it is not NULL.  Estimate the selectivity
+			 * as non-null fraction divided by number of distinct values,
+			 * so that we get a result averaged over all possible values
+			 * whether common or uncommon.  (Essentially, we are assuming
+			 * that the not-yet-known comparison value is equally likely
+			 * to be any of the possible values, regardless of their
+			 * frequency in the table.  Is that a good idea?)
+			 */
+			selec = 1.0 - stats->stanullfrac;
+			ndistinct = get_att_numdistinct(relid, attno, typid, stats);
+			if (ndistinct > 1)
+				selec /= ndistinct;
+			/*
+			 * Cross-check: selectivity should never be
+			 * estimated as more than the most common value's.
 			 */
-			selec = get_attdispersion(relid, attno, 0.01);
+			if (get_attstatsslot(statsTuple, typid, typmod,
+								 STATISTIC_KIND_MCV, InvalidOid,
+								 NULL, NULL,
+								 &numbers, &nnumbers))
+			{
+				if (nnumbers > 0 && selec > numbers[0])
+					selec = numbers[0];
+				free_attstatsslot(typid, NULL, 0, numbers, nnumbers);
+			}
 		}
 
-		result = (float8) selec;
+		ReleaseSysCache(statsTuple);
 	}
-	PG_RETURN_FLOAT8(result);
+	else
+	{
+		/*
+		 * No VACUUM ANALYZE stats available, so make a guess using
+		 * estimated number of distinct values and assuming they are
+		 * equally common.  (The guess is unlikely to be very good,
+		 * but we do know a few special cases.)
+		 */
+		selec = 1.0 / get_att_numdistinct(relid, attno, typid, NULL);
+	}
+
+	/* result should be in range, but make sure... */
+	if (selec < 0.0)
+		selec = 0.0;
+	else if (selec > 1.0)
+		selec = 1.0;
+
+	PG_RETURN_FLOAT8((float8) selec);
 }
 
 /*
@@ -301,117 +329,263 @@ scalarltsel(PG_FUNCTION_ARGS)
 	AttrNumber	attno = PG_GETARG_INT16(2);
 	Datum		value = PG_GETARG_DATUM(3);
 	int32		flag = PG_GETARG_INT32(4);
-	float8		result;
+	bool		isgt;
+	HeapTuple	oprTuple;
+	HeapTuple	statsTuple;
+	Form_pg_statistic stats;
+	Oid			contype;
+	FmgrInfo	opproc;
+	Oid			typid;
+	int32		typmod;
+	Datum	   *values;
+	int			nvalues;
+	float4	   *numbers;
+	int			nnumbers;
+	double		mcv_selec,
+				hist_selec,
+				sumcommon;
+	double		selec;
+	int			i;
+
+	if (NONVALUE(relid) || NONVALUE(attno))
+		PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL);
+
+	/* Can't do anything useful if no constant to compare against, either */
+	if (!(flag & SEL_CONSTANT))
+		PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL);
 
-	if (!(flag & SEL_CONSTANT) || NONVALUE(attno) || NONVALUE(relid))
-		result = DEFAULT_INEQ_SEL;
+	/*
+	 * Force the constant to be on the right to simplify later logic.
+	 * This means that we may be dealing with either "<" or ">" cases.
+	 */
+	if (flag & SEL_RIGHT)
+	{
+		/* we have x < const */
+		isgt = false;
+	}
 	else
 	{
-		HeapTuple	oprtuple;
-		Oid			ltype,
-					rtype,
-					contype;
-		Oid			typid;
-		int			typlen;
-		bool		typbyval;
-		int32		typmod;
-		Datum		hival,
-					loval;
-		double		val,
-					high,
-					low,
-					numerator,
-					denominator;
-
-		/*
-		 * Get left and right datatypes of the operator so we know what
-		 * type the constant is.
-		 */
-		oprtuple = SearchSysCache(OPEROID,
-								  ObjectIdGetDatum(opid),
-								  0, 0, 0);
-		if (!HeapTupleIsValid(oprtuple))
-			elog(ERROR, "scalarltsel: no tuple for operator %u", opid);
-		ltype = ((Form_pg_operator) GETSTRUCT(oprtuple))->oprleft;
-		rtype = ((Form_pg_operator) GETSTRUCT(oprtuple))->oprright;
-		contype = (flag & SEL_RIGHT) ? rtype : ltype;
-		ReleaseSysCache(oprtuple);
-
-		/* Now get info and stats about the attribute */
-		getattproperties(relid, attno,
-						 &typid, &typlen, &typbyval, &typmod);
-
-		if (!getattstatistics(relid, attno, typid, typmod,
-							  NULL, NULL, NULL,
-							  &loval, &hival))
+		/* we have const < x, commute to make x > const */
+		opid = get_commutator(opid);
+		if (!opid)
 		{
-			/* no stats available, so default result */
+			/* Use default selectivity (should we raise an error instead?) */
 			PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL);
 		}
+		isgt = true;
+	}
 
-		/* Convert the values to a uniform comparison scale. */
-		if (!convert_to_scalar(value, contype, &val,
-							   loval, hival, typid,
-							   &low, &high))
-		{
+	/*
+	 * The constant might not be the same datatype as the column;
+	 * look at the operator's input types to find out what it is.
+	 * Also set up to be able to call the operator's execution proc.
+	 */
+	oprTuple = SearchSysCache(OPEROID,
+							  ObjectIdGetDatum(opid),
+							  0, 0, 0);
+	if (!HeapTupleIsValid(oprTuple))
+		elog(ERROR, "scalarltsel: no tuple for operator %u", opid);
+	contype = ((Form_pg_operator) GETSTRUCT(oprTuple))->oprright;
+	fmgr_info(((Form_pg_operator) GETSTRUCT(oprTuple))->oprcode, &opproc);
+	ReleaseSysCache(oprTuple);
+
+	/* Now get info and stats about the attribute */
+	getattproperties(relid, attno, &typid, &typmod);
+
+	statsTuple = SearchSysCache(STATRELATT,
+								ObjectIdGetDatum(relid),
+								Int16GetDatum(attno),
+								0, 0);
+	if (!HeapTupleIsValid(statsTuple))
+	{
+		/* no stats available, so default result */
+		PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL);
+	}
+	stats = (Form_pg_statistic) GETSTRUCT(statsTuple);
 
-			/*
-			 * Ideally we'd produce an error here, on the grounds that the
-			 * given operator shouldn't have scalarltsel registered as its
-			 * selectivity func unless we can deal with its operand types.
-			 * But currently, all manner of stuff is invoking scalarltsel,
-			 * so give a default estimate until that can be fixed.
-			 */
-			if (!typbyval)
-			{
-				pfree(DatumGetPointer(hival));
-				pfree(DatumGetPointer(loval));
-			}
-			PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL);
-		}
+	/*
+	 * If we have most-common-values info, add up the fractions of the
+	 * MCV entries that satisfy MCV OP CONST.  These fractions contribute
+	 * directly to the result selectivity.  Also add up the total fraction
+	 * represented by MCV entries.
+	 */
+	mcv_selec = 0.0;
+	sumcommon = 0.0;
 
-		/* release temp storage if needed */
-		if (!typbyval)
+	if (get_attstatsslot(statsTuple, typid, typmod,
+						 STATISTIC_KIND_MCV, InvalidOid,
+						 &values, &nvalues,
+						 &numbers, &nnumbers))
+	{
+		for (i = 0; i < nvalues; i++)
 		{
-			pfree(DatumGetPointer(hival));
-			pfree(DatumGetPointer(loval));
+			if (DatumGetBool(FunctionCall2(&opproc,
+										   values[i],
+										   value)))
+				mcv_selec += numbers[i];
+			sumcommon += numbers[i];
 		}
+		free_attstatsslot(typid, values, nvalues, numbers, nnumbers);
+	}
+
+	/*
+	 * If there is a histogram, determine which bin the constant falls in,
+	 * and compute the resulting contribution to selectivity.
+	 *
+	 * Someday, VACUUM might store more than one histogram per rel/att,
+	 * corresponding to more than one possible sort ordering defined for
+	 * the column type.  However, to make that work we will need to figure
+	 * out which staop to search for --- it's not necessarily the one we
+	 * have at hand!  (For example, we might have a '<=' operator rather
+	 * than the '<' operator that will appear in staop.)  For now, assume
+	 * that whatever appears in pg_statistic is sorted the same way our
+	 * operator sorts.
+	 */
+	hist_selec = 0.0;
 
-		if (high <= low)
+	if (get_attstatsslot(statsTuple, typid, typmod,
+						 STATISTIC_KIND_HISTOGRAM, InvalidOid,
+						 &values, &nvalues,
+						 NULL, NULL))
+	{
+		if (nvalues > 1)
 		{
+			double	histfrac;
+			bool	ltcmp;
+
+			ltcmp = DatumGetBool(FunctionCall2(&opproc,
+											   values[0],
+											   value));
+			if (isgt)
+				ltcmp = !ltcmp;
+			if (!ltcmp)
+			{
+				/* Constant is below lower histogram boundary. */
+				histfrac = 0.0;
+			}
+			else
+			{
+				/*
+				 * Scan to find proper location.  This could be made faster
+				 * by using a binary-search method, but it's probably not
+				 * worth the trouble for typical histogram sizes.
+				 */
+				for (i = 1; i < nvalues; i++)
+				{
+					ltcmp = DatumGetBool(FunctionCall2(&opproc,
+													   values[i],
+													   value));
+					if (isgt)
+						ltcmp = !ltcmp;
+					if (!ltcmp)
+						break;
+				}
+				if (i >= nvalues)
+				{
+					/* Constant is above upper histogram boundary. */
+					histfrac = 1.0;
+				}
+				else
+				{
+					double		val,
+								high,
+								low;
+					double		binfrac;
 
+					/*
+					 * We have values[i-1] < constant < values[i].
+					 *
+					 * Convert the constant and the two nearest bin boundary
+					 * values to a uniform comparison scale, and do a linear
+					 * interpolation within this bin.
+					 */
+					if (convert_to_scalar(value, contype, &val,
+										  values[i-1], values[i], typid,
+										  &low, &high))
+					{
+						if (high <= low)
+						{
+							/* cope if bin boundaries appear identical */
+							binfrac = 0.5;
+						}
+						else if (val <= low)
+							binfrac = 0.0;
+						else if (val >= high)
+							binfrac = 1.0;
+						else
+							binfrac = (val - low) / (high - low);
+					}
+					else
+					{
+						/*
+						 * Ideally we'd produce an error here, on the grounds
+						 * that the given operator shouldn't have scalarltsel
+						 * registered as its selectivity func unless we can
+						 * deal with its operand types.  But currently, all
+						 * manner of stuff is invoking scalarltsel, so give a
+						 * default estimate until that can be fixed.
+						 */
+						binfrac = 0.5;
+					}
+					/*
+					 * Now, compute the overall selectivity across the values
+					 * represented by the histogram.  We have i-1 full bins
+					 * and binfrac partial bin below the constant.
+					 */
+					histfrac = (double) (i-1) + binfrac;
+					histfrac /= (double) (nvalues - 1);
+				}
+			}
 			/*
-			 * If we trusted the stats fully, we could return a small or
-			 * large selec depending on which side of the single data
-			 * point the constant is on.  But it seems better to assume
-			 * that the stats are wrong and return a default...
+			 * Now histfrac = fraction of histogram entries below the constant.
+			 *
+			 * Account for "<" vs ">"
 			 */
-			result = DEFAULT_INEQ_SEL;
-		}
-		else if (val < low || val > high)
-		{
-
+			hist_selec = isgt ? (1.0 - histfrac) : histfrac;
 			/*
-			 * If given value is outside the statistical range, return a
-			 * small or large value; but not 0.0/1.0 since there is a
-			 * chance the stats are out of date.
+			 * The histogram boundaries are only approximate to begin
+			 * with, and may well be out of date anyway.  Therefore,
+			 * don't believe extremely small or large selectivity
+			 * estimates.
 			 */
-			if (flag & SEL_RIGHT)
-				result = (val < low) ? 0.001 : 0.999;
-			else
-				result = (val < low) ? 0.999 : 0.001;
-		}
-		else
-		{
-			denominator = high - low;
-			if (flag & SEL_RIGHT)
-				numerator = val - low;
-			else
-				numerator = high - val;
-			result = numerator / denominator;
+			if (hist_selec < 0.001)
+				hist_selec = 0.001;
+			else if (hist_selec > 0.999)
+				hist_selec = 0.999;
 		}
+
+		free_attstatsslot(typid, values, nvalues, NULL, 0);
 	}
-	PG_RETURN_FLOAT8(result);
+
+	/*
+	 * Now merge the results from the MCV and histogram calculations,
+	 * realizing that the histogram covers only the non-null values that
+	 * are not listed in MCV.
+	 */
+	selec = 1.0 - stats->stanullfrac - sumcommon;
+
+	if (hist_selec > 0.0)
+		selec *= hist_selec;
+	else
+	{
+		/*
+		 * If no histogram but there are values not accounted for by MCV,
+		 * arbitrarily assume half of them will match.
+		 */
+		selec *= 0.5;
+	}
+
+	selec += mcv_selec;
+
+	ReleaseSysCache(statsTuple);
+
+	/* result should be in range, but make sure... */
+	if (selec < 0.0)
+		selec = 0.0;
+	else if (selec > 1.0)
+		selec = 1.0;
+
+	PG_RETURN_FLOAT8((float8) selec);
 }
 
 /*
@@ -428,34 +602,25 @@ scalargtsel(PG_FUNCTION_ARGS)
 	Datum		value = PG_GETARG_DATUM(3);
 	int32		flag = PG_GETARG_INT32(4);
 	Oid			ltopid;
-	float8		result;
 
 	/*
-	 * Compute selectivity of "<", then invert --- but only if we were
-	 * able to produce a non-default estimate.	Note that we get the
-	 * negator which strictly speaking means we are looking at "<=" for
-	 * ">" or "<" for ">=".  We assume this won't matter.
+	 * Commute so that we have a "<" or "<=" operator, then apply
+	 * scalarltsel.
 	 */
-	ltopid = get_negator(opid);
-	if (ltopid)
-	{
-		result = DatumGetFloat8(DirectFunctionCall5(scalarltsel,
-												ObjectIdGetDatum(ltopid),
-												 ObjectIdGetDatum(relid),
-													Int16GetDatum(attno),
-													value,
-													Int32GetDatum(flag)));
-	}
-	else
+	ltopid = get_commutator(opid);
+	if (!ltopid)
 	{
 		/* Use default selectivity (should we raise an error instead?) */
-		result = DEFAULT_INEQ_SEL;
+		PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL);
 	}
 
-	if (result != DEFAULT_INEQ_SEL)
-		result = 1.0 - result;
-
-	PG_RETURN_FLOAT8(result);
+	flag ^= SEL_RIGHT;
+	return DirectFunctionCall5(scalarltsel,
+							   ObjectIdGetDatum(ltopid),
+							   ObjectIdGetDatum(relid),
+							   Int16GetDatum(attno),
+							   value,
+							   Int32GetDatum(flag));
 }
 
 /*
@@ -476,7 +641,7 @@ patternsel(PG_FUNCTION_ARGS, Pattern_Type ptype)
 		result = DEFAULT_MATCH_SEL;
 	else
 	{
-		HeapTuple	oprtuple;
+		HeapTuple	oprTuple;
 		Oid			ltype,
 					rtype;
 		char	   *patt;
@@ -488,14 +653,14 @@ patternsel(PG_FUNCTION_ARGS, Pattern_Type ptype)
 		 * Get left and right datatypes of the operator so we know what
 		 * type the attribute is.
 		 */
-		oprtuple = SearchSysCache(OPEROID,
+		oprTuple = SearchSysCache(OPEROID,
 								  ObjectIdGetDatum(opid),
 								  0, 0, 0);
-		if (!HeapTupleIsValid(oprtuple))
+		if (!HeapTupleIsValid(oprTuple))
 			elog(ERROR, "patternsel: no tuple for operator %u", opid);
-		ltype = ((Form_pg_operator) GETSTRUCT(oprtuple))->oprleft;
-		rtype = ((Form_pg_operator) GETSTRUCT(oprtuple))->oprright;
-		ReleaseSysCache(oprtuple);
+		ltype = ((Form_pg_operator) GETSTRUCT(oprTuple))->oprleft;
+		rtype = ((Form_pg_operator) GETSTRUCT(oprTuple))->oprright;
+		ReleaseSysCache(oprTuple);
 
 		/* the right-hand const is type text for all supported operators */
 		Assert(rtype == TEXTOID);
@@ -659,42 +824,88 @@ eqjoinsel(PG_FUNCTION_ARGS)
 	AttrNumber	attno1 = PG_GETARG_INT16(2);
 	Oid			relid2 = PG_GETARG_OID(3);
 	AttrNumber	attno2 = PG_GETARG_INT16(4);
-	float8		result;
-	float8		num1,
-				num2,
-				min;
 	bool		unknown1 = NONVALUE(relid1) || NONVALUE(attno1);
 	bool		unknown2 = NONVALUE(relid2) || NONVALUE(attno2);
+	double		selec;
 
 	if (unknown1 && unknown2)
-		result = DEFAULT_EQ_SEL;
+		selec = DEFAULT_EQ_SEL;
 	else
 	{
-		num1 = unknown1 ? 1.0 : get_attdispersion(relid1, attno1, 0.01);
-		num2 = unknown2 ? 1.0 : get_attdispersion(relid2, attno2, 0.01);
+		Oid			typid1;
+		Oid			typid2;
+		int32		typmod1;
+		int32		typmod2;
+		HeapTuple	statsTuple1 = NULL;
+		HeapTuple	statsTuple2 = NULL;
+		Form_pg_statistic stats1 = NULL;
+		Form_pg_statistic stats2 = NULL;
+		double		nd1,
+					nd2;
+
+		if (unknown1)
+		{
+			nd1 = 100.0;
+		}
+		else
+		{
+			/* get info about the attribute */
+			getattproperties(relid1, attno1, &typid1, &typmod1);
+
+			/* get stats for the attribute, if available */
+			statsTuple1 = SearchSysCache(STATRELATT,
+										 ObjectIdGetDatum(relid1),
+										 Int16GetDatum(attno1),
+										 0, 0);
+			if (HeapTupleIsValid(statsTuple1))
+				stats1 = (Form_pg_statistic) GETSTRUCT(statsTuple1);
+
+			nd1 = get_att_numdistinct(relid1, attno1, typid1, stats1);
+		}
+
+		if (unknown2)
+		{
+			nd2 = 100.0;
+		}
+		else
+		{
+			/* get info about the attribute */
+			getattproperties(relid2, attno2, &typid2, &typmod2);
+
+			/* get stats for the attribute, if available */
+			statsTuple2 = SearchSysCache(STATRELATT,
+										 ObjectIdGetDatum(relid2),
+										 Int16GetDatum(attno2),
+										 0, 0);
+			if (HeapTupleIsValid(statsTuple2))
+				stats2 = (Form_pg_statistic) GETSTRUCT(statsTuple2);
+
+			nd2 = get_att_numdistinct(relid2, attno2, typid2, stats2);
+		}
 
 		/*
-		 * The join selectivity cannot be more than num2, since each tuple
-		 * in table 1 could match no more than num2 fraction of tuples in
-		 * table 2 (and that's only if the table-1 tuple matches the most
-		 * common value in table 2, so probably it's less).  By the same
-		 * reasoning it is not more than num1. The min is therefore an
-		 * upper bound.
+		 * Estimate the join selectivity as 1 / sqrt(nd1*nd2)
+		 * (can we produce any theory for this)?
 		 *
-		 * If we know the dispersion of only one side, use it; the reasoning
-		 * above still works.
+		 * XXX possibility to do better: if both attributes have histograms
+		 * then we could determine the exact join selectivity between the
+		 * MCV sets, and only have to assume the join behavior of the non-MCV
+		 * values.  This could be a big win when the MCVs cover a large part
+		 * of the population.
 		 *
-		 * XXX can we make a better estimate here?	Using the nullfrac
-		 * statistic might be helpful, for example.  Assuming the operator
-		 * is strict (does not succeed for null inputs) then the
-		 * selectivity couldn't be more than (1-nullfrac1)*(1-nullfrac2),
-		 * which might be usefully small if there are many nulls.  How
-		 * about applying the operator to the most common values?
+		 * XXX what about nulls?
 		 */
-		min = (num1 < num2) ? num1 : num2;
-		result = min;
+		selec = 1.0 / sqrt(nd1 * nd2);
+		if (selec > 1.0)
+			selec = 1.0;
+
+		if (HeapTupleIsValid(statsTuple1))
+			ReleaseSysCache(statsTuple1);
+		if (HeapTupleIsValid(statsTuple2))
+			ReleaseSysCache(statsTuple2);
+
 	}
-	PG_RETURN_FLOAT8(result);
+	PG_RETURN_FLOAT8((float8) selec);
 }
 
 /*
@@ -829,7 +1040,8 @@ icnlikejoinsel(PG_FUNCTION_ARGS)
  *	  Returns "true" if successful.
  *
  * All numeric datatypes are simply converted to their equivalent
- * "double" values.
+ * "double" values.  XXX what about NUMERIC values that are outside
+ * the range of "double"?
  *
  * String datatypes are converted by convert_string_to_scalar(),
  * which is explained below.  The reason why this routine deals with
@@ -917,7 +1129,7 @@ convert_numeric_to_scalar(Datum value, Oid typid)
 {
 	switch (typid)
 	{
-			case BOOLOID:
+		case BOOLOID:
 			return (double) DatumGetBool(value);
 		case INT2OID:
 			return (double) DatumGetInt16(value);
@@ -963,6 +1175,8 @@ convert_numeric_to_scalar(Datum value, Oid typid)
  * three strings before computing the scaled values.  This allows us to
  * "zoom in" when we encounter a narrow data range.  An example is a phone
  * number database where all the values begin with the same area code.
+ * (Actually, the bounds will be adjacent histogram-bin-boundary values,
+ * so this is more likely to happen than you might think.)
  */
 static void
 convert_string_to_scalar(unsigned char *value,
@@ -1208,11 +1422,11 @@ convert_timevalue_to_scalar(Datum value, Oid typid)
 /*
  * getattproperties
  *	  Retrieve pg_attribute properties for an attribute,
- *	  including type OID, type len, type byval flag, typmod.
+ *	  including type OID and typmod.
  */
 static void
 getattproperties(Oid relid, AttrNumber attnum,
-				 Oid *typid, int *typlen, bool *typbyval, int32 *typmod)
+				 Oid *typid, int32 *typmod)
 {
 	HeapTuple	atp;
 	Form_pg_attribute att_tup;
@@ -1227,164 +1441,87 @@ getattproperties(Oid relid, AttrNumber attnum,
 	att_tup = (Form_pg_attribute) GETSTRUCT(atp);
 
 	*typid = att_tup->atttypid;
-	*typlen = att_tup->attlen;
-	*typbyval = att_tup->attbyval;
 	*typmod = att_tup->atttypmod;
 
 	ReleaseSysCache(atp);
 }
 
 /*
- * getattstatistics
- *	  Retrieve the pg_statistic data for an attribute.
- *	  Returns 'false' if no stats are available.
+ * get_att_numdistinct
  *
- * Inputs:
- * 'relid' and 'attnum' are the relation and attribute number.
- * 'typid' and 'typmod' are the type and typmod of the column,
- * which the caller must already have looked up.
+ *	  Estimate the number of distinct values of an attribute.
  *
- * Outputs:
- * The available stats are nullfrac, commonfrac, commonval, loval, hival.
- * The caller need not retrieve all five --- pass NULL pointers for the
- * unwanted values.
+ * relid, attnum: identify the attribute to examine.
+ * typid: type of attribute.
+ * stats: pg_statistic tuple for attribute, or NULL if not available.
  *
- * commonval, loval, hival are returned as Datums holding the internal
- * representation of the values.  (Note that these should be pfree'd
- * after use if the data type is not by-value.)
+ * XXX possible future improvement: look to see if there is a unique
+ * index on the attribute.  If so, we can estimate ndistinct = ntuples.
+ * This should probably override any info from pg_statistic.
  */
-static bool
-getattstatistics(Oid relid,
-				 AttrNumber attnum,
-				 Oid typid,
-				 int32 typmod,
-				 double *nullfrac,
-				 double *commonfrac,
-				 Datum *commonval,
-				 Datum *loval,
-				 Datum *hival)
+static double
+get_att_numdistinct(Oid relid, AttrNumber attnum, Oid typid,
+					Form_pg_statistic stats)
 {
-	HeapTuple	tuple;
-	HeapTuple	typeTuple;
-	FmgrInfo	inputproc;
-	Oid			typelem;
-	bool		isnull;
+	HeapTuple	reltup;
+	double		ntuples;
 
 	/*
-	 * We assume that there will only be one entry in pg_statistic for the
-	 * given rel/att, so we search WITHOUT considering the staop column.
-	 * Someday, VACUUM might store more than one entry per rel/att,
-	 * corresponding to more than one possible sort ordering defined for
-	 * the column type.  However, to make that work we will need to figure
-	 * out which staop to search for --- it's not necessarily the one we
-	 * have at hand!  (For example, we might have a '>' operator rather
-	 * than the '<' operator that will appear in staop.)
+	 * Special-case boolean columns: presumably, two distinct values.
+	 *
+	 * Are there any other cases we should wire in special estimates for?
 	 */
-	tuple = SearchSysCache(STATRELID,
-						   ObjectIdGetDatum(relid),
-						   Int16GetDatum((int16) attnum),
-						   0, 0);
-	if (!HeapTupleIsValid(tuple))
-	{
-		/* no such stats entry */
-		return false;
-	}
+	if (typid == BOOLOID)
+		return 2.0;
 
-	if (nullfrac)
-		*nullfrac = ((Form_pg_statistic) GETSTRUCT(tuple))->stanullfrac;
-	if (commonfrac)
-		*commonfrac = ((Form_pg_statistic) GETSTRUCT(tuple))->stacommonfrac;
-
-	/* Get the type input proc for the column datatype */
-	typeTuple = SearchSysCache(TYPEOID,
-							   ObjectIdGetDatum(typid),
-							   0, 0, 0);
-	if (!HeapTupleIsValid(typeTuple))
-		elog(ERROR, "getattstatistics: Cache lookup failed for type %u",
-			 typid);
-	fmgr_info(((Form_pg_type) GETSTRUCT(typeTuple))->typinput, &inputproc);
-	typelem = ((Form_pg_type) GETSTRUCT(typeTuple))->typelem;
-	ReleaseSysCache(typeTuple);
+	/*
+	 * If VACUUM ANALYZE determined a fixed estimate, use it.
+	 */
+	if (stats && stats->stadistinct > 0.0)
+		return stats->stadistinct;
 
 	/*
-	 * Values are variable-length fields, so cannot access as struct
-	 * fields. Must do it the hard way with SysCacheGetAttr.
+	 * Otherwise we need to get the relation size.
 	 */
-	if (commonval)
-	{
-		Datum		val = SysCacheGetAttr(STATRELID, tuple,
-										  Anum_pg_statistic_stacommonval,
-										  &isnull);
+	reltup = SearchSysCache(RELOID,
+							ObjectIdGetDatum(relid),
+							0, 0, 0);
+	if (!HeapTupleIsValid(reltup))
+		elog(ERROR, "get_att_numdistinct: no relation tuple %u", relid);
 
-		if (isnull)
-		{
-			elog(DEBUG, "getattstatistics: stacommonval is null");
-			*commonval = PointerGetDatum(NULL);
-		}
-		else
-		{
-			char	   *strval = DatumGetCString(DirectFunctionCall1(textout,
-																   val));
-
-			*commonval = FunctionCall3(&inputproc,
-									   CStringGetDatum(strval),
-									   ObjectIdGetDatum(typelem),
-									   Int32GetDatum(typmod));
-			pfree(strval);
-		}
-	}
+	ntuples = ((Form_pg_class) GETSTRUCT(reltup))->reltuples;
 
-	if (loval)
-	{
-		Datum		val = SysCacheGetAttr(STATRELID, tuple,
-										  Anum_pg_statistic_staloval,
-										  &isnull);
+	ReleaseSysCache(reltup);
 
-		if (isnull)
-		{
-			elog(DEBUG, "getattstatistics: staloval is null");
-			*loval = PointerGetDatum(NULL);
-		}
-		else
-		{
-			char	   *strval = DatumGetCString(DirectFunctionCall1(textout,
-																   val));
-
-			*loval = FunctionCall3(&inputproc,
-								   CStringGetDatum(strval),
-								   ObjectIdGetDatum(typelem),
-								   Int32GetDatum(typmod));
-			pfree(strval);
-		}
-	}
+	if (ntuples <= 0.0)
+		return 100.0;			/* no data available; return a default */
 
-	if (hival)
-	{
-		Datum		val = SysCacheGetAttr(STATRELID, tuple,
-										  Anum_pg_statistic_stahival,
-										  &isnull);
+	/*
+	 * If VACUUM ANALYZE determined a scaled estimate, use it.
+	 */
+	if (stats && stats->stadistinct < 0.0)
+		return - stats->stadistinct * ntuples;
 
-		if (isnull)
-		{
-			elog(DEBUG, "getattstatistics: stahival is null");
-			*hival = PointerGetDatum(NULL);
-		}
-		else
-		{
-			char	   *strval = DatumGetCString(DirectFunctionCall1(textout,
-																   val));
-
-			*hival = FunctionCall3(&inputproc,
-								   CStringGetDatum(strval),
-								   ObjectIdGetDatum(typelem),
-								   Int32GetDatum(typmod));
-			pfree(strval);
-		}
+	/*
+	 * VACUUM ANALYZE does not compute stats for system attributes,
+	 * but some of them can reasonably be assumed unique anyway.
+	 */
+	switch (attnum)
+	{
+		case ObjectIdAttributeNumber:
+		case SelfItemPointerAttributeNumber:
+			return ntuples;
+		case TableOidAttributeNumber:
+			return 1.0;
 	}
 
-	ReleaseSysCache(tuple);
+	/*
+	 * Estimate ndistinct = ntuples if the table is small, else 100.
+	 */
+	if (ntuples < 100.0)
+		return ntuples;
 
-	return true;
+	return 100.0;
 }
 
 /*-------------------------------------------------------------------------
diff --git a/src/backend/utils/cache/lsyscache.c b/src/backend/utils/cache/lsyscache.c
index 82d55866215..3995de5d7a1 100644
--- a/src/backend/utils/cache/lsyscache.c
+++ b/src/backend/utils/cache/lsyscache.c
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/utils/cache/lsyscache.c,v 1.52 2001/03/23 04:49:55 momjian Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/utils/cache/lsyscache.c,v 1.53 2001/05/07 00:43:24 tgl Exp $
  *
  * NOTES
  *	  Eventually, the index information should go through here, too.
@@ -18,7 +18,10 @@
 #include "access/tupmacs.h"
 #include "catalog/pg_operator.h"
 #include "catalog/pg_proc.h"
+#include "catalog/pg_statistic.h"
 #include "catalog/pg_type.h"
+#include "utils/array.h"
+#include "utils/builtins.h"
 #include "utils/lsyscache.h"
 #include "utils/syscache.h"
 
@@ -182,106 +185,6 @@ get_atttypmod(Oid relid, AttrNumber attnum)
 		return -1;
 }
 
-/*
- * get_attdispersion
- *
- *	  Retrieve the dispersion statistic for an attribute,
- *	  or produce an estimate if no info is available.
- *
- * min_estimate is the minimum estimate to return if insufficient data
- * is available to produce a reliable value.  This value may vary
- * depending on context.  (For example, when deciding whether it is
- * safe to use a hashjoin, we want to be more conservative than when
- * estimating the number of tuples produced by an equijoin.)
- */
-double
-get_attdispersion(Oid relid, AttrNumber attnum, double min_estimate)
-{
-	HeapTuple	atp;
-	Form_pg_attribute att_tup;
-	double		dispersion;
-	Oid			atttypid;
-	int32		ntuples;
-
-	atp = SearchSysCache(ATTNUM,
-						 ObjectIdGetDatum(relid),
-						 Int16GetDatum(attnum),
-						 0, 0);
-	if (!HeapTupleIsValid(atp))
-	{
-		/* this should not happen */
-		elog(ERROR, "get_attdispersion: no attribute tuple %u %d",
-			 relid, attnum);
-		return min_estimate;
-	}
-
-	att_tup = (Form_pg_attribute) GETSTRUCT(atp);
-
-	dispersion = att_tup->attdispersion;
-	atttypid = att_tup->atttypid;
-
-	ReleaseSysCache(atp);
-
-	if (dispersion > 0.0)
-		return dispersion;		/* we have a specific estimate from VACUUM */
-
-	/*
-	 * Special-case boolean columns: the dispersion of a boolean is highly
-	 * unlikely to be anywhere near 1/numtuples, instead it's probably
-	 * more like 0.5.
-	 *
-	 * Are there any other cases we should wire in special estimates for?
-	 */
-	if (atttypid == BOOLOID)
-		return 0.5;
-
-	/*
-	 * Dispersion is either 0 (no data available) or -1 (dispersion is
-	 * 1/numtuples).  Either way, we need the relation size.
-	 */
-
-	atp = SearchSysCache(RELOID,
-						 ObjectIdGetDatum(relid),
-						 0, 0, 0);
-	if (!HeapTupleIsValid(atp))
-	{
-		/* this should not happen */
-		elog(ERROR, "get_attdispersion: no relation tuple %u", relid);
-		return min_estimate;
-	}
-
-	ntuples = ((Form_pg_class) GETSTRUCT(atp))->reltuples;
-
-	ReleaseSysCache(atp);
-
-	if (ntuples == 0)
-		return min_estimate;	/* no data available */
-
-	if (dispersion < 0.0)		/* VACUUM thinks there are no duplicates */
-		return 1.0 / (double) ntuples;
-
-	/*
-	 * VACUUM ANALYZE does not compute dispersion for system attributes,
-	 * but some of them can reasonably be assumed unique anyway.
-	 */
-	if (attnum == ObjectIdAttributeNumber ||
-		attnum == SelfItemPointerAttributeNumber)
-		return 1.0 / (double) ntuples;
-	if (attnum == TableOidAttributeNumber)
-		return 1.0;
-
-	/*
-	 * VACUUM ANALYZE has not been run for this table. Produce an estimate
-	 * of 1/numtuples.  This may produce unreasonably small estimates for
-	 * large tables, so limit the estimate to no less than min_estimate.
-	 */
-	dispersion = 1.0 / (double) ntuples;
-	if (dispersion < min_estimate)
-		dispersion = min_estimate;
-
-	return dispersion;
-}
-
 /*				---------- INDEX CACHE ----------						 */
 
 /*		watch this space...
@@ -876,3 +779,157 @@ get_typtype(Oid typid)
 }
 
 #endif
+
+/*				---------- STATISTICS CACHE ----------					 */
+
+/*
+ * get_attstatsslot
+ *
+ *		Extract the contents of a "slot" of a pg_statistic tuple.
+ *		Returns TRUE if requested slot type was found, else FALSE.
+ *
+ * Unlike other routines in this file, this takes a pointer to an
+ * already-looked-up tuple in the pg_statistic cache.  We do this since
+ * most callers will want to extract more than one value from the cache
+ * entry, and we don't want to repeat the cache lookup unnecessarily.
+ *
+ * statstuple: pg_statistics tuple to be examined.
+ * atttype: type OID of attribute.
+ * atttypmod: typmod of attribute.
+ * reqkind: STAKIND code for desired statistics slot kind.
+ * reqop: STAOP value wanted, or InvalidOid if don't care.
+ * values, nvalues: if not NULL, the slot's stavalues are extracted.
+ * numbers, nnumbers: if not NULL, the slot's stanumbers are extracted.
+ *
+ * If assigned, values and numbers are set to point to palloc'd arrays.
+ * If the attribute type is pass-by-reference, the values referenced by
+ * the values array are themselves palloc'd.  The palloc'd stuff can be
+ * freed by calling free_attstatsslot.
+ */
+bool
+get_attstatsslot(HeapTuple statstuple,
+				 Oid atttype, int32 atttypmod,
+				 int reqkind, Oid reqop,
+				 Datum **values, int *nvalues,
+				 float4 **numbers, int *nnumbers)
+{
+	Form_pg_statistic stats = (Form_pg_statistic) GETSTRUCT(statstuple);
+	int			i,
+				j;
+	Datum		val;
+	bool		isnull;
+	ArrayType  *statarray;
+	int			narrayelem;
+	HeapTuple	typeTuple;
+	FmgrInfo	inputproc;
+	Oid			typelem;
+
+	for (i = 0; i < STATISTIC_NUM_SLOTS; i++)
+	{
+		if ((&stats->stakind1)[i] == reqkind &&
+			(reqop == InvalidOid || (&stats->staop1)[i] == reqop))
+			break;
+	}
+	if (i >= STATISTIC_NUM_SLOTS)
+		return false;			/* not there */
+
+	if (values)
+	{
+		val = SysCacheGetAttr(STATRELATT, statstuple,
+							  Anum_pg_statistic_stavalues1 + i,
+							  &isnull);
+		if (isnull)
+			elog(ERROR, "get_attstatsslot: stavalues is null");
+		statarray = DatumGetArrayTypeP(val);
+		/*
+		 * Do initial examination of the array.  This produces a list
+		 * of text Datums --- ie, pointers into the text array value.
+		 */
+		deconstruct_array(statarray, false, -1, 'i', values, nvalues);
+		narrayelem = *nvalues;
+		/*
+		 * We now need to replace each text Datum by its internal equivalent.
+		 *
+		 * Get the type input proc and typelem for the column datatype.
+		 */
+		typeTuple = SearchSysCache(TYPEOID,
+								   ObjectIdGetDatum(atttype),
+								   0, 0, 0);
+		if (!HeapTupleIsValid(typeTuple))
+			elog(ERROR, "get_attstatsslot: Cache lookup failed for type %u",
+				 atttype);
+		fmgr_info(((Form_pg_type) GETSTRUCT(typeTuple))->typinput, &inputproc);
+		typelem = ((Form_pg_type) GETSTRUCT(typeTuple))->typelem;
+		ReleaseSysCache(typeTuple);
+		/*
+		 * Do the conversions.  The palloc'd array of Datums is reused
+		 * in place.
+		 */
+		for (j = 0; j < narrayelem; j++)
+		{
+			char	   *strval;
+
+			strval = DatumGetCString(DirectFunctionCall1(textout,
+														 (*values)[j]));
+			(*values)[j] = FunctionCall3(&inputproc,
+										 CStringGetDatum(strval),
+										 ObjectIdGetDatum(typelem),
+										 Int32GetDatum(atttypmod));
+			pfree(strval);
+		}
+		/*
+		 * Free statarray if it's a detoasted copy.
+		 */
+		if ((Pointer) statarray != DatumGetPointer(val))
+			pfree(statarray);
+	}
+
+	if (numbers)
+	{
+		val = SysCacheGetAttr(STATRELATT, statstuple,
+							  Anum_pg_statistic_stanumbers1 + i,
+							  &isnull);
+		if (isnull)
+			elog(ERROR, "get_attstatsslot: stanumbers is null");
+		statarray = DatumGetArrayTypeP(val);
+		/*
+		 * We expect the array to be a 1-D float4 array; verify that.
+		 * We don't need to use deconstruct_array() since the array
+		 * data is just going to look like a C array of float4 values.
+		 */
+		narrayelem = ARR_DIMS(statarray)[0];
+		if (ARR_NDIM(statarray) != 1 || narrayelem <= 0 ||
+			ARR_SIZE(statarray) != (ARR_OVERHEAD(1) + narrayelem * sizeof(float4)))
+			elog(ERROR, "get_attstatsslot: stanumbers is bogus");
+		*numbers = (float4 *) palloc(narrayelem * sizeof(float4));
+		memcpy(*numbers, ARR_DATA_PTR(statarray), narrayelem * sizeof(float4));
+		*nnumbers = narrayelem;
+		/*
+		 * Free statarray if it's a detoasted copy.
+		 */
+		if ((Pointer) statarray != DatumGetPointer(val))
+			pfree(statarray);
+	}
+
+	return true;
+}
+
+void
+free_attstatsslot(Oid atttype,
+				  Datum *values, int nvalues,
+				  float4 *numbers, int nnumbers)
+{
+	if (values)
+	{
+		if (! get_typbyval(atttype))
+		{
+			int		i;
+
+			for (i = 0; i < nvalues; i++)
+				pfree(DatumGetPointer(values[i]));
+		}
+		pfree(values);
+	}
+	if (numbers)
+		pfree(numbers);
+}
diff --git a/src/backend/utils/cache/syscache.c b/src/backend/utils/cache/syscache.c
index 75ef3179202..4e35b3fb35b 100644
--- a/src/backend/utils/cache/syscache.c
+++ b/src/backend/utils/cache/syscache.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/utils/cache/syscache.c,v 1.60 2001/03/22 03:59:57 momjian Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/utils/cache/syscache.c,v 1.61 2001/05/07 00:43:24 tgl Exp $
  *
  * NOTES
  *	  These routines allow the parser/planner/executor to perform
@@ -313,7 +313,7 @@ static struct cachedesc cacheinfo[] = {
 			0,
 			0
 	}},
-	{StatisticRelationName,		/* STATRELID */
+	{StatisticRelationName,		/* STATRELATT */
 		StatisticRelidAttnumIndex,
 		2,
 		{
diff --git a/src/backend/utils/sort/tuplesort.c b/src/backend/utils/sort/tuplesort.c
index d27bfb29668..5a77c47c200 100644
--- a/src/backend/utils/sort/tuplesort.c
+++ b/src/backend/utils/sort/tuplesort.c
@@ -78,7 +78,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/utils/sort/tuplesort.c,v 1.15 2001/03/23 04:49:55 momjian Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/utils/sort/tuplesort.c,v 1.16 2001/05/07 00:43:24 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -87,7 +87,11 @@
 
 #include "access/heapam.h"
 #include "access/nbtree.h"
+#include "catalog/catname.h"
+#include "catalog/pg_amop.h"
+#include "catalog/pg_amproc.h"
 #include "miscadmin.h"
+#include "utils/fmgroids.h"
 #include "utils/logtape.h"
 #include "utils/lsyscache.h"
 #include "utils/tuplesort.h"
@@ -263,6 +267,7 @@ struct Tuplesortstate
 	TupleDesc	tupDesc;
 	int			nKeys;
 	ScanKey		scanKeys;
+	SortFunctionKind *sortFnKinds;
 
 	/*
 	 * These variables are specific to the IndexTuple case; they are set
@@ -279,6 +284,7 @@ struct Tuplesortstate
 	Oid			datumType;
 	Oid			sortOperator;
 	FmgrInfo	sortOpFn;		/* cached lookup data for sortOperator */
+	SortFunctionKind sortFnKind;
 	/* we need typelen and byval in order to know how to copy the Datums. */
 	int			datumTypeLen;
 	bool		datumTypeByVal;
@@ -458,14 +464,14 @@ tuplesort_begin_common(bool randomAccess)
 
 Tuplesortstate *
 tuplesort_begin_heap(TupleDesc tupDesc,
-					 int nkeys, ScanKey keys,
+					 int nkeys,
+					 Oid *sortOperators, AttrNumber *attNums,
 					 bool randomAccess)
 {
 	Tuplesortstate *state = tuplesort_begin_common(randomAccess);
+	int			i;
 
-	AssertArg(nkeys >= 1);
-	AssertArg(keys[0].sk_attno != 0);
-	AssertArg(keys[0].sk_procedure != 0);
+	AssertArg(nkeys > 0);
 
 	state->comparetup = comparetup_heap;
 	state->copytup = copytup_heap;
@@ -475,7 +481,29 @@ tuplesort_begin_heap(TupleDesc tupDesc,
 
 	state->tupDesc = tupDesc;
 	state->nKeys = nkeys;
-	state->scanKeys = keys;
+	state->scanKeys = (ScanKey) palloc(nkeys * sizeof(ScanKeyData));
+	MemSet(state->scanKeys, 0, nkeys * sizeof(ScanKeyData));
+	state->sortFnKinds = (SortFunctionKind *)
+		palloc(nkeys * sizeof(SortFunctionKind));
+	MemSet(state->sortFnKinds, 0, nkeys * sizeof(SortFunctionKind));
+
+	for (i = 0; i < nkeys; i++)
+	{
+		RegProcedure sortFunction;
+
+		AssertArg(sortOperators[i] != 0);
+		AssertArg(attNums[i] != 0);
+
+		/* select a function that implements the sort operator */
+		SelectSortFunction(sortOperators[i], &sortFunction,
+						   &state->sortFnKinds[i]);
+
+		ScanKeyEntryInitialize(&state->scanKeys[i],
+							   0x0,
+							   attNums[i],
+							   sortFunction,
+							   (Datum) 0);
+	}
 
 	return state;
 }
@@ -507,6 +535,7 @@ tuplesort_begin_datum(Oid datumType,
 					  bool randomAccess)
 {
 	Tuplesortstate *state = tuplesort_begin_common(randomAccess);
+	RegProcedure sortFunction;
 	int16		typlen;
 	bool		typbyval;
 
@@ -518,8 +547,12 @@ tuplesort_begin_datum(Oid datumType,
 
 	state->datumType = datumType;
 	state->sortOperator = sortOperator;
-	/* lookup the function that implements the sort operator */
-	fmgr_info(get_opcode(sortOperator), &state->sortOpFn);
+
+	/* select a function that implements the sort operator */
+	SelectSortFunction(sortOperator, &sortFunction, &state->sortFnKind);
+	/* and look up the function */
+	fmgr_info(sortFunction, &state->sortOpFn);
+
 	/* lookup necessary attributes of the datum type */
 	get_typlenbyval(datumType, &typlen, &typbyval);
 	state->datumTypeLen = typlen;
@@ -548,6 +581,13 @@ tuplesort_end(Tuplesortstate *state)
 	}
 	if (state->memtupindex)
 		pfree(state->memtupindex);
+
+	/* this stuff might better belong in a variant-specific shutdown routine */
+	if (state->scanKeys)
+		pfree(state->scanKeys);
+	if (state->sortFnKinds)
+		pfree(state->sortFnKinds);
+
 	pfree(state);
 }
 
@@ -1692,6 +1732,7 @@ comparetup_heap(Tuplesortstate *state, const void *a, const void *b)
 	for (nkey = 0; nkey < state->nKeys; nkey++)
 	{
 		ScanKey		scanKey = state->scanKeys + nkey;
+		SortFunctionKind fnKind = state->sortFnKinds[nkey];
 		AttrNumber	attno = scanKey->sk_attno;
 		Datum		lattr,
 					rattr;
@@ -1708,23 +1749,36 @@ comparetup_heap(Tuplesortstate *state, const void *a, const void *b)
 		}
 		else if (isnull2)
 			return -1;
-		else if (scanKey->sk_flags & SK_COMMUTE)
-		{
-			if (DatumGetBool(FunctionCall2(&scanKey->sk_func,
-										   rattr, lattr)))
-				return -1;		/* a < b after commute */
-			if (DatumGetBool(FunctionCall2(&scanKey->sk_func,
-										   lattr, rattr)))
-				return 1;		/* a > b after commute */
-		}
 		else
 		{
-			if (DatumGetBool(FunctionCall2(&scanKey->sk_func,
-										   lattr, rattr)))
-				return -1;		/* a < b */
-			if (DatumGetBool(FunctionCall2(&scanKey->sk_func,
-										   rattr, lattr)))
-				return 1;		/* a > b */
+			int32		compare;
+
+			if (fnKind == SORTFUNC_LT)
+			{
+				if (DatumGetBool(FunctionCall2(&scanKey->sk_func,
+											   lattr, rattr)))
+					compare = -1;	/* a < b */
+				else if (DatumGetBool(FunctionCall2(&scanKey->sk_func,
+													rattr, lattr)))
+					compare = 1;	/* a > b */
+				else
+					compare = 0;
+			}
+			else
+			{
+				/* sort function is CMP or REVCMP */
+				compare = DatumGetInt32(FunctionCall2(&scanKey->sk_func,
+													  lattr, rattr));
+				if (fnKind == SORTFUNC_REVCMP)
+					compare = -compare;
+			}
+
+			if (compare != 0)
+			{
+				if (scanKey->sk_flags & SK_COMMUTE)
+					compare = -compare;
+				return compare;
+			}
 		}
 	}
 
@@ -1852,8 +1906,10 @@ comparetup_index(Tuplesortstate *state, const void *a, const void *b)
 		}
 		else
 		{
+			/* the comparison function is always of CMP type */
 			compare = DatumGetInt32(FunctionCall2(&entry->sk_func,
-												attrDatum1, attrDatum2));
+												  attrDatum1,
+												  attrDatum2));
 		}
 
 		if (compare != 0)
@@ -1954,7 +2010,7 @@ comparetup_datum(Tuplesortstate *state, const void *a, const void *b)
 	}
 	else if (rtup->isNull)
 		return -1;
-	else
+	else if (state->sortFnKind == SORTFUNC_LT)
 	{
 		if (DatumGetBool(FunctionCall2(&state->sortOpFn,
 									   ltup->val, rtup->val)))
@@ -1964,6 +2020,17 @@ comparetup_datum(Tuplesortstate *state, const void *a, const void *b)
 			return 1;			/* a > b */
 		return 0;
 	}
+	else
+	{
+		/* sort function is CMP or REVCMP */
+		int32	compare;
+
+		compare = DatumGetInt32(FunctionCall2(&state->sortOpFn,
+											  ltup->val, rtup->val));
+		if (state->sortFnKind == SORTFUNC_REVCMP)
+			compare = -compare;
+		return compare;
+	}
 }
 
 static void *
@@ -2032,3 +2099,119 @@ tuplesize_datum(Tuplesortstate *state, void *tup)
 		return (unsigned int) tuplelen;
 	}
 }
+
+
+/*
+ * This routine selects an appropriate sorting function to implement
+ * a sort operator as efficiently as possible.  The straightforward
+ * method is to use the operator's implementation proc --- ie, "<"
+ * comparison.  However, that way often requires two calls of the function
+ * per comparison.  If we can find a btree three-way comparator function
+ * associated with the operator, we can use it to do the comparisons
+ * more efficiently.  We also support the possibility that the operator
+ * is ">" (descending sort), in which case we have to reverse the output
+ * of the btree comparator.
+ *
+ * Possibly this should live somewhere else (backend/catalog/, maybe?).
+ */
+void
+SelectSortFunction(Oid sortOperator,
+				   RegProcedure *sortFunction,
+				   SortFunctionKind *kind)
+{
+	Relation	relation;
+	HeapScanDesc scan;
+	ScanKeyData skey[3];
+	HeapTuple	tuple;
+	Oid			opclass = InvalidOid;
+
+	/*
+	 * Scan pg_amop to see if the target operator is registered as the
+	 * "<" or ">" operator of any btree opclass.  It's possible that it
+	 * might be registered both ways (eg, if someone were to build a
+	 * "reverse sort" opclass for some reason); prefer the "<" case if so.
+	 * If the operator is registered the same way in multiple opclasses,
+	 * assume we can use the associated comparator function from any one.
+	 */
+	relation = heap_openr(AccessMethodOperatorRelationName,
+						  AccessShareLock);
+
+	ScanKeyEntryInitialize(&skey[0], 0,
+						   Anum_pg_amop_amopid,
+						   F_OIDEQ,
+						   ObjectIdGetDatum(BTREE_AM_OID));
+
+	ScanKeyEntryInitialize(&skey[1], 0,
+						   Anum_pg_amop_amopopr,
+						   F_OIDEQ,
+						   ObjectIdGetDatum(sortOperator));
+
+	scan = heap_beginscan(relation, false, SnapshotNow, 2, skey);
+
+	while (HeapTupleIsValid(tuple = heap_getnext(scan, 0)))
+	{
+		Form_pg_amop aform = (Form_pg_amop) GETSTRUCT(tuple);
+
+		if (aform->amopstrategy == BTLessStrategyNumber)
+		{
+			opclass = aform->amopclaid;
+			*kind = SORTFUNC_CMP;
+			break;				/* done looking */
+		}
+		else if (aform->amopstrategy == BTGreaterStrategyNumber)
+		{
+			opclass = aform->amopclaid;
+			*kind = SORTFUNC_REVCMP;
+			/* keep scanning in hopes of finding a BTLess entry */
+		}
+	}
+
+	heap_endscan(scan);
+	heap_close(relation, AccessShareLock);
+
+	if (OidIsValid(opclass))
+	{
+		/* Found a suitable opclass, get its comparator support function */
+		relation = heap_openr(AccessMethodProcedureRelationName,
+							  AccessShareLock);
+
+		ScanKeyEntryInitialize(&skey[0], 0,
+							   Anum_pg_amproc_amid,
+							   F_OIDEQ,
+							   ObjectIdGetDatum(BTREE_AM_OID));
+
+		ScanKeyEntryInitialize(&skey[1], 0,
+							   Anum_pg_amproc_amopclaid,
+							   F_OIDEQ,
+							   ObjectIdGetDatum(opclass));
+
+		ScanKeyEntryInitialize(&skey[2], 0,
+							   Anum_pg_amproc_amprocnum,
+							   F_INT2EQ,
+							   Int16GetDatum(BTORDER_PROC));
+
+		scan = heap_beginscan(relation, false, SnapshotNow, 3, skey);
+
+		*sortFunction = InvalidOid;
+
+		if (HeapTupleIsValid(tuple = heap_getnext(scan, 0)))
+		{
+			Form_pg_amproc aform = (Form_pg_amproc) GETSTRUCT(tuple);
+			*sortFunction = aform->amproc;
+		}
+
+		heap_endscan(scan);
+		heap_close(relation, AccessShareLock);
+
+		if (RegProcedureIsValid(*sortFunction))
+			return;
+	}
+
+	/* Can't find a comparator, so use the operator as-is */
+
+	*kind = SORTFUNC_LT;
+	*sortFunction = get_opcode(sortOperator);
+	if (!RegProcedureIsValid(*sortFunction))
+		elog(ERROR, "SelectSortFunction: operator %u has no implementation",
+			 sortOperator);
+}
diff --git a/src/include/access/tuptoaster.h b/src/include/access/tuptoaster.h
index 759ab3d39e2..6e38529204d 100644
--- a/src/include/access/tuptoaster.h
+++ b/src/include/access/tuptoaster.h
@@ -6,15 +6,13 @@
  *
  * Copyright (c) 2000, PostgreSQL Development Team
  *
- * $Id: tuptoaster.h,v 1.10 2001/03/22 04:00:32 momjian Exp $
+ * $Id: tuptoaster.h,v 1.11 2001/05/07 00:43:24 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
 #ifndef TUPTOASTER_H
 #define TUPTOASTER_H
 
-#ifdef TUPLE_TOASTER_ACTIVE
-
 #include "access/heapam.h"
 #include "access/htup.h"
 #include "access/tupmacs.h"
@@ -109,7 +107,13 @@ extern varattrib *heap_tuple_untoast_attr(varattrib *attr);
  */
 extern Datum toast_compress_datum(Datum value);
 
-#endif	 /* TUPLE_TOASTER_ACTIVE */
+/* ----------
+ * toast_raw_datum_size -
+ *
+ *	Return the raw (detoasted) size of a varlena datum
+ * ----------
+ */
+extern Size toast_raw_datum_size(Datum value);
 
 
 #endif	 /* TUPTOASTER_H */
diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h
index 963b11c1d38..832f91fb09f 100644
--- a/src/include/catalog/catversion.h
+++ b/src/include/catalog/catversion.h
@@ -37,7 +37,7 @@
  * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: catversion.h,v 1.70 2001/03/22 04:00:35 momjian Exp $
+ * $Id: catversion.h,v 1.71 2001/05/07 00:43:24 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -53,6 +53,6 @@
  */
 
 /*							yyyymmddN */
-#define CATALOG_VERSION_NO	200101061
+#define CATALOG_VERSION_NO	200105051
 
 #endif
diff --git a/src/include/catalog/heap.h b/src/include/catalog/heap.h
index a7248f6c6dc..7ab04b05fb2 100644
--- a/src/include/catalog/heap.h
+++ b/src/include/catalog/heap.h
@@ -7,13 +7,14 @@
  * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: heap.h,v 1.34 2001/03/22 04:00:35 momjian Exp $
+ * $Id: heap.h,v 1.35 2001/05/07 00:43:24 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
 #ifndef HEAP_H
 #define HEAP_H
 
+#include "catalog/pg_attribute.h"
 #include "utils/rel.h"
 
 typedef struct RawColumnDefault
@@ -44,4 +45,6 @@ extern void AddRelationRawConstraints(Relation rel,
 						  List *rawColDefaults,
 						  List *rawConstraints);
 
+extern Form_pg_attribute SystemAttributeDefinition(AttrNumber attno);
+
 #endif	 /* HEAP_H */
diff --git a/src/include/catalog/index.h b/src/include/catalog/index.h
index 1dac0bb1c31..07aaad61c79 100644
--- a/src/include/catalog/index.h
+++ b/src/include/catalog/index.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: index.h,v 1.33 2001/03/22 04:00:35 momjian Exp $
+ * $Id: index.h,v 1.34 2001/05/07 00:43:24 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -46,7 +46,7 @@ extern void FormIndexDatum(IndexInfo *indexInfo,
 			   Datum *datum,
 			   char *nullv);
 
-extern void UpdateStats(Oid relid, long reltuples);
+extern void UpdateStats(Oid relid, double reltuples);
 extern bool IndexesAreActive(Oid relid, bool comfirmCommitted);
 extern void setRelhasindex(Oid relid, bool hasindex);
 
diff --git a/src/include/catalog/indexing.h b/src/include/catalog/indexing.h
index 41a580a3777..cc155cf1bbb 100644
--- a/src/include/catalog/indexing.h
+++ b/src/include/catalog/indexing.h
@@ -8,7 +8,7 @@
  * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: indexing.h,v 1.48 2001/03/22 04:00:36 momjian Exp $
+ * $Id: indexing.h,v 1.49 2001/05/07 00:43:24 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -171,7 +171,7 @@ DECLARE_UNIQUE_INDEX(pg_rewrite_rulename_index on pg_rewrite using btree(rulenam
 xDECLARE_UNIQUE_INDEX(pg_shadow_name_index on pg_shadow using btree(usename name_ops));
 xDECLARE_UNIQUE_INDEX(pg_shadow_sysid_index on pg_shadow using btree(usesysid int4_ops));
 */
-DECLARE_INDEX(pg_statistic_relid_att_index on pg_statistic using btree(starelid oid_ops, staattnum int2_ops));
+DECLARE_UNIQUE_INDEX(pg_statistic_relid_att_index on pg_statistic using btree(starelid oid_ops, staattnum int2_ops));
 DECLARE_INDEX(pg_trigger_tgconstrname_index on pg_trigger using btree(tgconstrname name_ops));
 DECLARE_INDEX(pg_trigger_tgconstrrelid_index on pg_trigger using btree(tgconstrrelid oid_ops));
 DECLARE_INDEX(pg_trigger_tgrelid_index on pg_trigger using btree(tgrelid oid_ops));
diff --git a/src/include/catalog/pg_attribute.h b/src/include/catalog/pg_attribute.h
index 58724e94dc9..6e11aa6d530 100644
--- a/src/include/catalog/pg_attribute.h
+++ b/src/include/catalog/pg_attribute.h
@@ -8,7 +8,7 @@
  * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: pg_attribute.h,v 1.70 2001/03/22 04:00:37 momjian Exp $
+ * $Id: pg_attribute.h,v 1.71 2001/05/07 00:43:24 tgl Exp $
  *
  * NOTES
  *	  the genbki.sh script reads this file and generates .bki
@@ -36,15 +36,14 @@
  *		typedef struct FormData_pg_attribute
  *
  *		If you change the following, make sure you change the structs for
- *		system attributes in heap.c and index.c also.
+ *		system attributes in catalog/heap.c also.
  * ----------------
  */
 CATALOG(pg_attribute) BOOTSTRAP
 {
 	Oid			attrelid;		/* OID of relation containing this
 								 * attribute */
-	NameData	attname;
-	Oid			atttypid;
+	NameData	attname;		/* name of attribute */
 
 	/*
 	 * atttypid is the OID of the instance in Catalog Class pg_type that
@@ -53,30 +52,20 @@ CATALOG(pg_attribute) BOOTSTRAP
 	 * attalign attributes of this instance, so they had better match or
 	 * Postgres will fail.
 	 */
-
-	float4		attdispersion;
+	Oid			atttypid;
 
 	/*
-	 * attdispersion is the dispersion statistic of the column (0.0 to
-	 * 1.0), or zero if the statistic has not been calculated, or -1.0 if
-	 * VACUUM found that the column contains no duplicate entries (in
-	 * which case the dispersion should be taken as 1.0/numberOfRows for
-	 * the current table size).  The -1.0 hack is useful because the
-	 * number of rows may be updated more often than attdispersion is. We
-	 * assume that the column will retain its no-duplicate-entry property.
-	 * (Perhaps this should be driven off the existence of a UNIQUE index
-	 * for the column, instead of being a statistical guess?)
+	 * attstattarget is the target number of statistics datapoints to collect
+	 * during VACUUM ANALYZE of this column.  A zero here indicates that we
+	 * do not wish to collect any stats about this column.
 	 */
-
-	int2		attlen;
+	int4		attstattarget;
 
 	/*
 	 * attlen is a copy of the typlen field from pg_type for this
-	 * attribute.  See atttypid above.	See struct Form_pg_type for
-	 * definition.
+	 * attribute.  See atttypid comments above.
 	 */
-
-	int2		attnum;
+	int2		attlen;
 
 	/*
 	 * attnum is the "attribute number" for the attribute:	A value that
@@ -91,10 +80,13 @@ CATALOG(pg_attribute) BOOTSTRAP
 	 *
 	 * Note that (attnum - 1) is often used as the index to an array.
 	 */
+	int2		attnum;
 
-	int4		attnelems;		/* number of dimensions, if an array type */
-
-	int4		attcacheoff;
+	/*
+	 * attndims is the declared number of dimensions, if an array type,
+	 * otherwise zero.
+	 */
+	int4		attndims;
 
 	/*
 	 * fastgetattr() uses attcacheoff to cache byte offsets of attributes
@@ -103,8 +95,7 @@ CATALOG(pg_attribute) BOOTSTRAP
 	 * tuple descriptor, we may then update attcacheoff in the copies.
 	 * This speeds up the attribute walking process.
 	 */
-
-	int4		atttypmod;
+	int4		attcacheoff;
 
 	/*
 	 * atttypmod records type-specific data supplied at table creation
@@ -113,16 +104,13 @@ CATALOG(pg_attribute) BOOTSTRAP
 	 * argument. The value will generally be -1 for types that do not need
 	 * typmod.
 	 */
-
-	bool		attbyval;
+	int4		atttypmod;
 
 	/*
 	 * attbyval is a copy of the typbyval field from pg_type for this
-	 * attribute.  See atttypid above.	See struct Form_pg_type for
-	 * definition.
+	 * attribute.  See atttypid comments above.
 	 */
-
-	char		attstorage;
+	bool		attbyval;
 
 	/*----------
 	 * attstorage tells for VARLENA attributes, what the heap access
@@ -137,30 +125,31 @@ CATALOG(pg_attribute) BOOTSTRAP
 	 * but only as a last resort ('e' and 'x' fields are moved first).
 	 *----------
 	 */
+	char		attstorage;
 
+	/* This flag indicates that the attribute is really a set */
 	bool		attisset;
-	char		attalign;
 
 	/*
 	 * attalign is a copy of the typalign field from pg_type for this
-	 * attribute.  See atttypid above.	See struct Form_pg_type for
-	 * definition.
+	 * attribute.  See atttypid comments above.
 	 */
-
-	bool		attnotnull;
+	char		attalign;
 
 	/* This flag represents the "NOT NULL" constraint */
-	bool		atthasdef;
+	bool		attnotnull;
 
 	/* Has DEFAULT value or not */
+	bool		atthasdef;
 } FormData_pg_attribute;
 
 /*
  * someone should figure out how to do this properly. (The problem is
- * the size of the C struct is not the same as the size of the tuple.)
+ * the size of the C struct is not the same as the size of the tuple
+ * because of alignment padding at the end of the struct.)
  */
 #define ATTRIBUTE_TUPLE_SIZE \
-	(offsetof(FormData_pg_attribute,atthasdef) + sizeof(char))
+	(offsetof(FormData_pg_attribute,atthasdef) + sizeof(bool))
 
 /* ----------------
  *		Form_pg_attribute corresponds to a pointer to a tuple with
@@ -178,10 +167,10 @@ typedef FormData_pg_attribute *Form_pg_attribute;
 #define Anum_pg_attribute_attrelid		1
 #define Anum_pg_attribute_attname		2
 #define Anum_pg_attribute_atttypid		3
-#define Anum_pg_attribute_attdispersion 4
+#define Anum_pg_attribute_attstattarget 4
 #define Anum_pg_attribute_attlen		5
 #define Anum_pg_attribute_attnum		6
-#define Anum_pg_attribute_attnelems		7
+#define Anum_pg_attribute_attndims		7
 #define Anum_pg_attribute_attcacheoff	8
 #define Anum_pg_attribute_atttypmod		9
 #define Anum_pg_attribute_attbyval		10
@@ -206,6 +195,7 @@ typedef FormData_pg_attribute *Form_pg_attribute;
 	(attribute)->attnotnull = false; \
 	(attribute)->atthasdef = false;
 #endif	 /* _DROP_COLUMN_HACK__ */
+
 /* ----------------
  *		SCHEMA_ macros for declaring hardcoded tuple descriptors.
  *		these are used in utils/cache/relcache.c
@@ -231,25 +221,25 @@ typedef FormData_pg_attribute *Form_pg_attribute;
  * ----------------
  */
 #define Schema_pg_type \
-{ 1247, {"typname"},	   19, 0, NAMEDATALEN,	1, 0, -1, -1, '\0', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1247, {"typowner"},	   23, 0,	4,	2, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1247, {"typlen"},		   21, 0,	2,	3, 0, -1, -1, '\001', 'p', '\0', 's', '\0', '\0' }, \
-{ 1247, {"typprtlen"},	   21, 0,	2,	4, 0, -1, -1, '\001', 'p', '\0', 's', '\0', '\0' }, \
-{ 1247, {"typbyval"},	   16, 0,	1,	5, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1247, {"typtype"},	   18, 0,	1,	6, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1247, {"typisdefined"},  16, 0,	1,	7, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1247, {"typdelim"},	   18, 0,	1,	8, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1247, {"typrelid"},	   26, 0,	4,	9, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1247, {"typelem"},	   26, 0,	4, 10, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1247, {"typinput"},	   24, 0,	4, 11, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1247, {"typoutput"},	   24, 0,	4, 12, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1247, {"typreceive"},    24, 0,	4, 13, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1247, {"typsend"},	   24, 0,	4, 14, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1247, {"typalign"},	   18, 0,	1, 15, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1247, {"typstorage"},    18, 0,	1, 16, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1247, {"typdefault"},    25, 0,  -1, 17, 0, -1, -1, '\0'	, 'x', '\0', 'i', '\0', '\0' }
-
-DATA(insert OID = 0 ( 1247 typname			19 0 NAMEDATALEN   1 0 -1 -1 f p f i f f));
+{ 1247, {"typname"},	   19, DEFAULT_ATTSTATTARGET, NAMEDATALEN,	1, 0, -1, -1, false, 'p', false, 'i', false, false }, \
+{ 1247, {"typowner"},	   23, 0,	4,	2, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1247, {"typlen"},		   21, 0,	2,	3, 0, -1, -1, true, 'p', false, 's', false, false }, \
+{ 1247, {"typprtlen"},	   21, 0,	2,	4, 0, -1, -1, true, 'p', false, 's', false, false }, \
+{ 1247, {"typbyval"},	   16, 0,	1,	5, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1247, {"typtype"},	   18, 0,	1,	6, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1247, {"typisdefined"},  16, 0,	1,	7, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1247, {"typdelim"},	   18, 0,	1,	8, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1247, {"typrelid"},	   26, 0,	4,	9, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1247, {"typelem"},	   26, 0,	4, 10, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1247, {"typinput"},	   24, 0,	4, 11, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1247, {"typoutput"},	   24, 0,	4, 12, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1247, {"typreceive"},    24, 0,	4, 13, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1247, {"typsend"},	   24, 0,	4, 14, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1247, {"typalign"},	   18, 0,	1, 15, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1247, {"typstorage"},    18, 0,	1, 16, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1247, {"typdefault"},    25, 0,  -1, 17, 0, -1, -1, false	, 'x', false, 'i', false, false }
+
+DATA(insert OID = 0 ( 1247 typname			19 DEFAULT_ATTSTATTARGET NAMEDATALEN   1 0 -1 -1 f p f i f f));
 DATA(insert OID = 0 ( 1247 typowner			23 0  4   2 0 -1 -1 t p f i f f));
 DATA(insert OID = 0 ( 1247 typlen			21 0  2   3 0 -1 -1 t p f s f f));
 DATA(insert OID = 0 ( 1247 typprtlen		21 0  2   4 0 -1 -1 t p f s f f));
@@ -299,25 +289,25 @@ DATA(insert OID = 0 ( 1262 tableoid			26 0  4  -7 0 -1 -1 t p f i f f));
  * ----------------
  */
 #define Schema_pg_proc \
-{ 1255, {"proname"},			19, 0, NAMEDATALEN,  1, 0, -1, -1, '\0', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1255, {"proowner"},			23, 0,	4,	2, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1255, {"prolang"},			26, 0,	4,	3, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1255, {"proisinh"},			16, 0,	1,	4, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1255, {"proistrusted"},		16, 0,	1,	5, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1255, {"proiscachable"},		16, 0,	1,	6, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1255, {"proisstrict"},		16, 0,	1,	7, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1255, {"pronargs"},			21, 0,	2,	8, 0, -1, -1, '\001', 'p', '\0', 's', '\0', '\0' }, \
-{ 1255, {"proretset"},			16, 0,	1,	9, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1255, {"prorettype"},			26, 0,	4, 10, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1255, {"proargtypes"},		30, 0, INDEX_MAX_KEYS*4, 11, 0, -1, -1, '\0', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1255, {"probyte_pct"},		23, 0,	4, 12, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1255, {"properbyte_cpu"},		23, 0,	4, 13, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1255, {"propercall_cpu"},		23, 0,	4, 14, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1255, {"prooutin_ratio"},		23, 0,	4, 15, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1255, {"prosrc"},				25, 0, -1, 16, 0, -1, -1, '\0', 'x', '\0', 'i', '\0', '\0' }, \
-{ 1255, {"probin"},				17, 0, -1, 17, 0, -1, -1, '\0', 'x', '\0', 'i', '\0', '\0' }
-
-DATA(insert OID = 0 ( 1255 proname			19 0 NAMEDATALEN   1 0 -1 -1 f p f i f f));
+{ 1255, {"proname"},			19, DEFAULT_ATTSTATTARGET, NAMEDATALEN,  1, 0, -1, -1, false, 'p', false, 'i', false, false }, \
+{ 1255, {"proowner"},			23, 0,	4,	2, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1255, {"prolang"},			26, 0,	4,	3, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1255, {"proisinh"},			16, 0,	1,	4, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1255, {"proistrusted"},		16, 0,	1,	5, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1255, {"proiscachable"},		16, 0,	1,	6, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1255, {"proisstrict"},		16, 0,	1,	7, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1255, {"pronargs"},			21, 0,	2,	8, 0, -1, -1, true, 'p', false, 's', false, false }, \
+{ 1255, {"proretset"},			16, 0,	1,	9, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1255, {"prorettype"},			26, 0,	4, 10, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1255, {"proargtypes"},		30, 0, INDEX_MAX_KEYS*4, 11, 0, -1, -1, false, 'p', false, 'i', false, false }, \
+{ 1255, {"probyte_pct"},		23, 0,	4, 12, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1255, {"properbyte_cpu"},		23, 0,	4, 13, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1255, {"propercall_cpu"},		23, 0,	4, 14, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1255, {"prooutin_ratio"},		23, 0,	4, 15, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1255, {"prosrc"},				25, 0, -1, 16, 0, -1, -1, false, 'x', false, 'i', false, false }, \
+{ 1255, {"probin"},				17, 0, -1, 17, 0, -1, -1, false, 'x', false, 'i', false, false }
+
+DATA(insert OID = 0 ( 1255 proname			19 DEFAULT_ATTSTATTARGET NAMEDATALEN   1 0 -1 -1 f p f i f f));
 DATA(insert OID = 0 ( 1255 proowner			23 0  4   2 0 -1 -1 t p f i f f));
 DATA(insert OID = 0 ( 1255 prolang			26 0  4   3 0 -1 -1 t p f i f f));
 DATA(insert OID = 0 ( 1255 proisinh			16 0  1   4 0 -1 -1 t p f c f f));
@@ -346,8 +336,8 @@ DATA(insert OID = 0 ( 1255 tableoid			26 0  4  -7 0 -1 -1 t p f i f f));
  *		pg_shadow
  * ----------------
  */
-DATA(insert OID = 0 ( 1260 usename			19	0 NAMEDATALEN	1 0 -1 -1 f p f i f f));
-DATA(insert OID = 0 ( 1260 usesysid			23	0	4	2 0 -1 -1 t p f i f f));
+DATA(insert OID = 0 ( 1260 usename			19	DEFAULT_ATTSTATTARGET NAMEDATALEN	1 0 -1 -1 f p f i f f));
+DATA(insert OID = 0 ( 1260 usesysid			23	DEFAULT_ATTSTATTARGET	4	2 0 -1 -1 t p f i f f));
 DATA(insert OID = 0 ( 1260 usecreatedb		16	0	1	3 0 -1 -1 t p f c f f));
 DATA(insert OID = 0 ( 1260 usetrace			16	0	1	4 0 -1 -1 t p f c f f));
 DATA(insert OID = 0 ( 1260 usesuper			16	0	1	5 0 -1 -1 t p f c f f));
@@ -366,8 +356,8 @@ DATA(insert OID = 0 ( 1260 tableoid			26 0  4  -7 0 -1 -1 t p f i f f));
  *		pg_group
  * ----------------
  */
-DATA(insert OID = 0 ( 1261 groname			19 0 NAMEDATALEN  1 0 -1 -1 f p f i f f));
-DATA(insert OID = 0 ( 1261 grosysid			23 0  4   2 0 -1 -1 t p f i f f));
+DATA(insert OID = 0 ( 1261 groname			19 DEFAULT_ATTSTATTARGET NAMEDATALEN  1 0 -1 -1 f p f i f f));
+DATA(insert OID = 0 ( 1261 grosysid			23 DEFAULT_ATTSTATTARGET  4   2 0 -1 -1 t p f i f f));
 DATA(insert OID = 0 ( 1261 grolist		  1007 0 -1   3 0 -1 -1 f x f i f f));
 DATA(insert OID = 0 ( 1261 ctid				27 0  6  -1 0 -1 -1 f p f i f f));
 DATA(insert OID = 0 ( 1261 oid				26 0  4  -2 0 -1 -1 t p f i f f));
@@ -382,29 +372,29 @@ DATA(insert OID = 0 ( 1261 tableoid			26 0  4  -7 0 -1 -1 t p f i f f));
  * ----------------
  */
 #define Schema_pg_attribute \
-{ 1249, {"attrelid"},	  26, 0,	4,	1, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1249, {"attname"},	  19, 0, NAMEDATALEN,	2, 0, -1, -1, '\0', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1249, {"atttypid"},	  26, 0,	4,	3, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1249, {"attdispersion"}, 700, 0,	4,	4, 0, -1, -1, '\0', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1249, {"attlen"},		  21, 0,	2,	5, 0, -1, -1, '\001', 'p', '\0', 's', '\0', '\0' }, \
-{ 1249, {"attnum"},		  21, 0,	2,	6, 0, -1, -1, '\001', 'p', '\0', 's', '\0', '\0' }, \
-{ 1249, {"attnelems"},	  23, 0,	4,	7, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1249, {"attcacheoff"},  23, 0,	4,	8, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1249, {"atttypmod"},	  23, 0,	4,	9, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1249, {"attbyval"},	  16, 0,	1, 10, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1249, {"attstorage"},   18, 0,	1, 11, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1249, {"attisset"},	  16, 0,	1, 12, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1249, {"attalign"},	  18, 0,	1, 13, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1249, {"attnotnull"},  16, 0, 1, 14, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1249, {"atthasdef"},	 16, 0, 1, 15, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }
-
-DATA(insert OID = 0 ( 1249 attrelid			26 0  4   1 0 -1 -1 t p f i f f));
-DATA(insert OID = 0 ( 1249 attname			19 0 NAMEDATALEN  2 0 -1 -1 f p f i f f));
+{ 1249, {"attrelid"},	  26, DEFAULT_ATTSTATTARGET,	4,	1, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1249, {"attname"},	  19, DEFAULT_ATTSTATTARGET, NAMEDATALEN,	2, 0, -1, -1, false, 'p', false, 'i', false, false }, \
+{ 1249, {"atttypid"},	  26, 0,	4,	3, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1249, {"attstattarget"}, 23, 0,	4,	4, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1249, {"attlen"},		  21, 0,	2,	5, 0, -1, -1, true, 'p', false, 's', false, false }, \
+{ 1249, {"attnum"},		  21, 0,	2,	6, 0, -1, -1, true, 'p', false, 's', false, false }, \
+{ 1249, {"attndims"},	  23, 0,	4,	7, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1249, {"attcacheoff"},  23, 0,	4,	8, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1249, {"atttypmod"},	  23, 0,	4,	9, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1249, {"attbyval"},	  16, 0,	1, 10, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1249, {"attstorage"},   18, 0,	1, 11, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1249, {"attisset"},	  16, 0,	1, 12, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1249, {"attalign"},	  18, 0,	1, 13, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1249, {"attnotnull"},  16, 0, 1, 14, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1249, {"atthasdef"},	 16, 0, 1, 15, 0, -1, -1, true, 'p', false, 'c', false, false }
+
+DATA(insert OID = 0 ( 1249 attrelid			26 DEFAULT_ATTSTATTARGET  4   1 0 -1 -1 t p f i f f));
+DATA(insert OID = 0 ( 1249 attname			19 DEFAULT_ATTSTATTARGET NAMEDATALEN  2 0 -1 -1 f p f i f f));
 DATA(insert OID = 0 ( 1249 atttypid			26 0  4   3 0 -1 -1 t p f i f f));
-DATA(insert OID = 0 ( 1249 attdispersion   700 0  4   4 0 -1 -1 f p f i f f));
+DATA(insert OID = 0 ( 1249 attstattarget	23 0  4   4 0 -1 -1 t p f i f f));
 DATA(insert OID = 0 ( 1249 attlen			21 0  2   5 0 -1 -1 t p f s f f));
 DATA(insert OID = 0 ( 1249 attnum			21 0  2   6 0 -1 -1 t p f s f f));
-DATA(insert OID = 0 ( 1249 attnelems		23 0  4   7 0 -1 -1 t p f i f f));
+DATA(insert OID = 0 ( 1249 attndims			23 0  4   7 0 -1 -1 t p f i f f));
 DATA(insert OID = 0 ( 1249 attcacheoff		23 0  4   8 0 -1 -1 t p f i f f));
 DATA(insert OID = 0 ( 1249 atttypmod		23 0  4   9 0 -1 -1 t p f i f f));
 DATA(insert OID = 0 ( 1249 attbyval			16 0  1  10 0 -1 -1 t p f c f f));
@@ -426,36 +416,36 @@ DATA(insert OID = 0 ( 1249 tableoid			26 0  4  -7 0 -1 -1 t p f i f f));
  * ----------------
  */
 #define Schema_pg_class \
-{ 1259, {"relname"},	   19, 0, NAMEDATALEN,	1, 0, -1, -1, '\0', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1259, {"reltype"},	   26, 0,	4,	2, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1259, {"relowner"},	   23, 0,	4,	3, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1259, {"relam"},		   26, 0,	4,	4, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1259, {"relfilenode"},   26, 0,	4,	5, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1259, {"relpages"},	   23, 0,	4,	6, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1259, {"reltuples"},	   23, 0,	4,	7, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1259, {"reltoastrelid"}, 26, 0,	4,	8, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1259, {"reltoastidxid"}, 26, 0,	4,	9, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1259, {"relhasindex"},   16, 0,	1, 10, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1259, {"relisshared"},   16, 0,	1, 11, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1259, {"relkind"},	   18, 0,	1, 12, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1259, {"relnatts"},	   21, 0,	2, 13, 0, -1, -1, '\001', 'p', '\0', 's', '\0', '\0' }, \
-{ 1259, {"relchecks"},	   21, 0,	2, 14, 0, -1, -1, '\001', 'p', '\0', 's', '\0', '\0' }, \
-{ 1259, {"reltriggers"},   21, 0,	2, 15, 0, -1, -1, '\001', 'p', '\0', 's', '\0', '\0' }, \
-{ 1259, {"relukeys"},	   21, 0,	2, 16, 0, -1, -1, '\001', 'p', '\0', 's', '\0', '\0' }, \
-{ 1259, {"relfkeys"},	   21, 0,	2, 17, 0, -1, -1, '\001', 'p', '\0', 's', '\0', '\0' }, \
-{ 1259, {"relrefs"},	   21, 0,	2, 18, 0, -1, -1, '\001', 'p', '\0', 's', '\0', '\0' }, \
-{ 1259, {"relhaspkey"},    16, 0,	1, 19, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1259, {"relhasrules"},   16, 0,	1, 20, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1259, {"relhassubclass"},16, 0,	1, 21, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1259, {"relacl"},		 1034, 0,  -1, 22, 0, -1, -1,	'\0', 'x', '\0', 'i', '\0', '\0' }
-
-DATA(insert OID = 0 ( 1259 relname			19 0 NAMEDATALEN   1 0 -1 -1 f p f i f f));
+{ 1259, {"relname"},	   19, DEFAULT_ATTSTATTARGET, NAMEDATALEN,	1, 0, -1, -1, false, 'p', false, 'i', false, false }, \
+{ 1259, {"reltype"},	   26, 0,	4,	2, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1259, {"relowner"},	   23, 0,	4,	3, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1259, {"relam"},		   26, 0,	4,	4, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1259, {"relfilenode"},   26, 0,	4,	5, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1259, {"relpages"},	   23, 0,	4,	6, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1259, {"reltuples"},	   700, 0,	4,	7, 0, -1, -1, false, 'p', false, 'i', false, false }, \
+{ 1259, {"reltoastrelid"}, 26, 0,	4,	8, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1259, {"reltoastidxid"}, 26, 0,	4,	9, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1259, {"relhasindex"},   16, 0,	1, 10, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1259, {"relisshared"},   16, 0,	1, 11, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1259, {"relkind"},	   18, 0,	1, 12, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1259, {"relnatts"},	   21, 0,	2, 13, 0, -1, -1, true, 'p', false, 's', false, false }, \
+{ 1259, {"relchecks"},	   21, 0,	2, 14, 0, -1, -1, true, 'p', false, 's', false, false }, \
+{ 1259, {"reltriggers"},   21, 0,	2, 15, 0, -1, -1, true, 'p', false, 's', false, false }, \
+{ 1259, {"relukeys"},	   21, 0,	2, 16, 0, -1, -1, true, 'p', false, 's', false, false }, \
+{ 1259, {"relfkeys"},	   21, 0,	2, 17, 0, -1, -1, true, 'p', false, 's', false, false }, \
+{ 1259, {"relrefs"},	   21, 0,	2, 18, 0, -1, -1, true, 'p', false, 's', false, false }, \
+{ 1259, {"relhaspkey"},    16, 0,	1, 19, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1259, {"relhasrules"},   16, 0,	1, 20, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1259, {"relhassubclass"},16, 0,	1, 21, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1259, {"relacl"},		 1034, 0,  -1, 22, 0, -1, -1,	false, 'x', false, 'i', false, false }
+
+DATA(insert OID = 0 ( 1259 relname			19 DEFAULT_ATTSTATTARGET NAMEDATALEN   1 0 -1 -1 f p f i f f));
 DATA(insert OID = 0 ( 1259 reltype			26 0  4   2 0 -1 -1 t p f i f f));
 DATA(insert OID = 0 ( 1259 relowner			23 0  4   3 0 -1 -1 t p f i f f));
 DATA(insert OID = 0 ( 1259 relam			26 0  4   4 0 -1 -1 t p f i f f));
 DATA(insert OID = 0 ( 1259 relfilenode		26 0  4   5 0 -1 -1 t p f i f f));
 DATA(insert OID = 0 ( 1259 relpages			23 0  4   6 0 -1 -1 t p f i f f));
-DATA(insert OID = 0 ( 1259 reltuples		23 0  4   7 0 -1 -1 t p f i f f));
+DATA(insert OID = 0 ( 1259 reltuples	   700 0  4   7 0 -1 -1 f p f i f f));
 DATA(insert OID = 0 ( 1259 reltoastrelid	26 0  4   8 0 -1 -1 t p f i f f));
 DATA(insert OID = 0 ( 1259 reltoastidxid	26 0  4   9 0 -1 -1 t p f i f f));
 DATA(insert OID = 0 ( 1259 relhasindex		16 0  1  10 0 -1 -1 t p f c f f));
@@ -544,7 +534,7 @@ DATA(insert OID = 0 ( 1219 tableoid			26 0  4  -7 0 -1 -1 t p f i f f));
  * ----------------
  */
 #define Schema_pg_variable \
-{ 1264, {"varfoo"},  26, 0, 4, 1, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }
+{ 1264, {"varfoo"},  26, 0, 4, 1, 0, -1, -1, true, 'p', false, 'i', false, false }
 
 DATA(insert OID = 0 ( 1264 varfoo			26 0  4   1 0 -1 -1 t p f i f f));
 
@@ -555,7 +545,7 @@ DATA(insert OID = 0 ( 1264 varfoo			26 0  4   1 0 -1 -1 t p f i f f));
  * ----------------
  */
 #define Schema_pg_log \
-{ 1269, {"logfoo"},  26, 0, 4, 1, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }
+{ 1269, {"logfoo"},  26, 0, 4, 1, 0, -1, -1, true, 'p', false, 'i', false, false }
 
 DATA(insert OID = 0 ( 1269 logfoo			26 0  4   1 0 -1 -1 t p f i f f));
 
@@ -566,7 +556,7 @@ DATA(insert OID = 0 ( 1269 logfoo			26 0  4   1 0 -1 -1 t p f i f f));
  * ----------------
  */
 #define Schema_pg_xactlock \
-{ 376, {"xactlockfoo"},  26, 0, 4, 1, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }
+{ 376, {"xactlockfoo"},  26, 0, 4, 1, 0, -1, -1, true, 'p', false, 'i', false, false }
 
 DATA(insert OID = 0 ( 376 xactlockfoo		26 0  4   1 0 -1 -1 t p f i f f));
 
diff --git a/src/include/catalog/pg_class.h b/src/include/catalog/pg_class.h
index 81e75e14b6a..86de88cc9b6 100644
--- a/src/include/catalog/pg_class.h
+++ b/src/include/catalog/pg_class.h
@@ -8,7 +8,7 @@
  * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: pg_class.h,v 1.47 2001/03/22 04:00:38 momjian Exp $
+ * $Id: pg_class.h,v 1.48 2001/05/07 00:43:25 tgl Exp $
  *
  * NOTES
  *	  the genbki.sh script reads this file and generates .bki
@@ -52,7 +52,7 @@ CATALOG(pg_class) BOOTSTRAP
 	Oid			relam;
 	Oid			relfilenode;
 	int4		relpages;
-	int4		reltuples;
+	float4		reltuples;
 	Oid			reltoastrelid;
 	Oid			reltoastidxid;
 	bool		relhasindex;
diff --git a/src/include/catalog/pg_statistic.h b/src/include/catalog/pg_statistic.h
index 2f39bea3245..8d6a6b37c16 100644
--- a/src/include/catalog/pg_statistic.h
+++ b/src/include/catalog/pg_statistic.h
@@ -8,7 +8,7 @@
  * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: pg_statistic.h,v 1.10 2001/01/24 19:43:22 momjian Exp $
+ * $Id: pg_statistic.h,v 1.11 2001/05/07 00:43:25 tgl Exp $
  *
  * NOTES
  *	  the genbki.sh script reads this file and generates .bki
@@ -36,40 +36,91 @@ CATALOG(pg_statistic)
 	/* These fields form the unique key for the entry: */
 	Oid			starelid;		/* relation containing attribute */
 	int2		staattnum;		/* attribute (column) stats are for */
-	Oid			staop;			/* '<' comparison op used for lo/hi vals */
+
+	/* the fraction of the column's entries that are NULL: */
+	float4		stanullfrac;
 
 	/*
-	 * Note: the current VACUUM code will never produce more than one
-	 * entry per column, but in theory there could be multiple entries if
-	 * a datatype has more than one useful ordering operator.  Also, the
-	 * current code will not write an entry unless it found at least one
-	 * non-NULL value in the column; so the remaining fields will never be
-	 * NULL.
+	 * stawidth is the average width in bytes of non-null entries.  For
+	 * fixed-width datatypes this is of course the same as the typlen, but
+	 * for varlena types it is more useful.  Note that this is the average
+	 * width of the data as actually stored, post-TOASTing (eg, for a
+	 * moved-out-of-line value, only the size of the pointer object is
+	 * counted).  This is the appropriate definition for the primary use of
+	 * the statistic, which is to estimate sizes of in-memory hash tables of
+	 * tuples.
+	 */
+	int4		stawidth;
+
+	/* ----------------
+	 * stadistinct indicates the (approximate) number of distinct non-null
+	 * data values in the column.  The interpretation is:
+	 *		0		unknown or not computed
+	 *		> 0		actual number of distinct values
+	 *		< 0		negative of multiplier for number of rows
+	 * The special negative case allows us to cope with columns that are
+	 * unique (stadistinct = -1) or nearly so (for example, a column in
+	 * which values appear about twice on the average could be represented
+	 * by stadistinct = -0.5).  Because the number-of-rows statistic in
+	 * pg_class may be updated more frequently than pg_statistic is, it's
+	 * important to be able to describe such situations as a multiple of
+	 * the number of rows, rather than a fixed number of distinct values.
+	 * But in other cases a fixed number is correct (eg, a boolean column).
+	 * ----------------
+	 */
+	float4		stadistinct;
+
+	/* ----------------
+	 * To allow keeping statistics on different kinds of datatypes,
+	 * we do not hard-wire any particular meaning for the remaining
+	 * statistical fields.  Instead, we provide several "slots" in which
+	 * statistical data can be placed.  Each slot includes:
+	 *		kind			integer code identifying kind of data
+	 *		op				OID of associated operator, if needed
+	 *		numbers			float4 array (for statistical values)
+	 *		values			text array (for representations of data values)
+	 * The ID and operator fields are never NULL; they are zeroes in an
+	 * unused slot.  The numbers and values fields are NULL in an unused
+	 * slot, and might also be NULL in a used slot if the slot kind has
+	 * no need for one or the other.
+	 * ----------------
 	 */
 
+	int2		stakind1;
+	int2		stakind2;
+	int2		stakind3;
+	int2		stakind4;
+
+	Oid			staop1;
+	Oid			staop2;
+	Oid			staop3;
+	Oid			staop4;
+
 	/*
-	 * These fields contain the stats about the column indicated by the
-	 * key
+	 * THE REST OF THESE ARE VARIABLE LENGTH FIELDS, and may even be absent
+	 * (NULL). They cannot be accessed as C struct entries; you have to use
+	 * the full field access machinery (heap_getattr) for them.  We declare
+	 * them here for the catalog machinery.
 	 */
-	float4		stanullfrac;	/* the fraction of the entries that are
-								 * NULL */
-	float4		stacommonfrac;	/* the fraction that are the most common
-								 * val */
+
+	float4		stanumbers1[1];
+	float4		stanumbers2[1];
+	float4		stanumbers3[1];
+	float4		stanumbers4[1];
 
 	/*
-	 * THE REST OF THESE ARE VARIABLE LENGTH FIELDS. They cannot be
-	 * accessed as C struct entries; you have to use the full field access
-	 * machinery (heap_getattr) for them.
-	 *
-	 * All three of these are text representations of data values of the
-	 * column's data type.  To re-create the actual Datum, do
-	 * datatypein(textout(givenvalue)).
+	 * Values in these text arrays are external representations of values
+	 * of the column's data type.  To re-create the actual Datum, do
+	 * datatypein(textout(arrayelement)).
 	 */
-	text		stacommonval;	/* most common non-null value in column */
-	text		staloval;		/* smallest non-null value in column */
-	text		stahival;		/* largest non-null value in column */
+	text		stavalues1[1];
+	text		stavalues2[1];
+	text		stavalues3[1];
+	text		stavalues4[1];
 } FormData_pg_statistic;
 
+#define STATISTIC_NUM_SLOTS  4
+
 /* ----------------
  *		Form_pg_statistic corresponds to a pointer to a tuple with
  *		the format of pg_statistic relation.
@@ -81,14 +132,78 @@ typedef FormData_pg_statistic *Form_pg_statistic;
  *		compiler constants for pg_statistic
  * ----------------
  */
-#define Natts_pg_statistic				8
+#define Natts_pg_statistic				21
 #define Anum_pg_statistic_starelid		1
 #define Anum_pg_statistic_staattnum		2
-#define Anum_pg_statistic_staop			3
-#define Anum_pg_statistic_stanullfrac	4
-#define Anum_pg_statistic_stacommonfrac 5
-#define Anum_pg_statistic_stacommonval	6
-#define Anum_pg_statistic_staloval		7
-#define Anum_pg_statistic_stahival		8
+#define Anum_pg_statistic_stanullfrac	3
+#define Anum_pg_statistic_stawidth		4
+#define Anum_pg_statistic_stadistinct	5
+#define Anum_pg_statistic_stakind1		6
+#define Anum_pg_statistic_stakind2		7
+#define Anum_pg_statistic_stakind3		8
+#define Anum_pg_statistic_stakind4		9
+#define Anum_pg_statistic_staop1		10
+#define Anum_pg_statistic_staop2		11
+#define Anum_pg_statistic_staop3		12
+#define Anum_pg_statistic_staop4		13
+#define Anum_pg_statistic_stanumbers1	14
+#define Anum_pg_statistic_stanumbers2	15
+#define Anum_pg_statistic_stanumbers3	16
+#define Anum_pg_statistic_stanumbers4	17
+#define Anum_pg_statistic_stavalues1	18
+#define Anum_pg_statistic_stavalues2	19
+#define Anum_pg_statistic_stavalues3	20
+#define Anum_pg_statistic_stavalues4	21
+
+/*
+ * Currently, three statistical slot "kinds" are defined: most common values,
+ * histogram, and correlation.  Additional "kinds" will probably appear in
+ * future to help cope with non-scalar datatypes.
+ *
+ * Code reading the pg_statistic relation should not assume that a particular
+ * data "kind" will appear in any particular slot.  Instead, search the
+ * stakind fields to see if the desired data is available.
+ */
+
+/*
+ * In a "most common values" slot, staop is the OID of the "=" operator
+ * used to decide whether values are the same or not.  stavalues contains
+ * the K most common non-null values appearing in the column, and stanumbers
+ * contains their frequencies (fractions of total row count).  The values
+ * shall be ordered in decreasing frequency.  Note that since the arrays are
+ * variable-size, K may be chosen by the statistics collector.  Values should
+ * not appear in MCV unless they have been observed to occur more than once;
+ * a unique column will have no MCV slot.
+ */
+#define STATISTIC_KIND_MCV  1
+
+/*
+ * A "histogram" slot describes the distribution of scalar data.  staop is
+ * the OID of the "<" operator that describes the sort ordering.  (In theory,
+ * more than one histogram could appear, if a datatype has more than one
+ * useful sort operator.)  stavalues contains M (>=2) non-null values that
+ * divide the non-null column data values into M-1 bins of approximately equal
+ * population.  The first stavalues item is the MIN and the last is the MAX.
+ * stanumbers is not used and should be NULL.  IMPORTANT POINT: if an MCV
+ * slot is also provided, then the histogram describes the data distribution
+ * *after removing the values listed in MCV* (thus, it's a "compressed
+ * histogram" in the technical parlance).  This allows a more accurate
+ * representation of the distribution of a column with some very-common
+ * values.  In a column with only a few distinct values, it's possible that
+ * the MCV list describes the entire data population; in this case the
+ * histogram reduces to empty and should be omitted.
+ */
+#define STATISTIC_KIND_HISTOGRAM  2
+
+/*
+ * A "correlation" slot describes the correlation between the physical order
+ * of table tuples and the ordering of data values of this column, as seen
+ * by the "<" operator identified by staop.  (As with the histogram, more
+ * than one entry could theoretically appear.)  stavalues is not used and
+ * should be NULL.  stanumbers contains a single entry, the correlation
+ * coefficient between the sequence of data values and the sequence of
+ * their actual tuple positions.  The coefficient ranges from +1 to -1.
+ */
+#define STATISTIC_KIND_CORRELATION  3
 
 #endif	 /* PG_STATISTIC_H */
diff --git a/src/include/commands/command.h b/src/include/commands/command.h
index 8b108451d2a..7eb1a4fab84 100644
--- a/src/include/commands/command.h
+++ b/src/include/commands/command.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: command.h,v 1.26 2001/03/22 04:00:41 momjian Exp $
+ * $Id: command.h,v 1.27 2001/05/07 00:43:25 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -43,9 +43,13 @@ extern void PortalCleanup(Portal portal);
 extern void AlterTableAddColumn(const char *relationName,
 					bool inh, ColumnDef *colDef);
 
-extern void AlterTableAlterColumn(const char *relationName,
-					  bool inh, const char *colName,
-					  Node *newDefault);
+extern void AlterTableAlterColumnDefault(const char *relationName,
+										 bool inh, const char *colName,
+										 Node *newDefault);
+
+extern void AlterTableAlterColumnStatistics(const char *relationName,
+											bool inh, const char *colName,
+											Node *statsTarget);
 
 extern void AlterTableDropColumn(const char *relationName,
 					 bool inh, const char *colName,
diff --git a/src/include/commands/vacuum.h b/src/include/commands/vacuum.h
index d82d22fcdfc..87bb0007aa0 100644
--- a/src/include/commands/vacuum.h
+++ b/src/include/commands/vacuum.h
@@ -1,129 +1,27 @@
 /*-------------------------------------------------------------------------
  *
  * vacuum.h
- *	  header file for postgres vacuum cleaner
+ *	  header file for postgres vacuum cleaner and statistics analyzer
  *
  *
  * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: vacuum.h,v 1.34 2001/03/22 04:00:43 momjian Exp $
+ * $Id: vacuum.h,v 1.35 2001/05/07 00:43:25 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
 #ifndef VACUUM_H
 #define VACUUM_H
 
-#include "catalog/pg_attribute.h"
-#include "catalog/pg_index.h"
-#include "fmgr.h"
-#include "nodes/pg_list.h"
-#include "storage/itemptr.h"
+#include "nodes/parsenodes.h"
 
 
-typedef struct VAttListData
-{
-	int			val_dummy;
-	struct VAttListData *val_next;
-} VAttListData;
-
-typedef VAttListData *VAttList;
-
-typedef struct VacPageData
-{
-	BlockNumber blkno;			/* BlockNumber of this Page */
-	Size		free;			/* FreeSpace on this Page */
-	uint16		offsets_used;	/* Number of OffNums used by vacuum */
-	uint16		offsets_free;	/* Number of OffNums free or to be free */
-	OffsetNumber offsets[1];	/* Array of its OffNums */
-} VacPageData;
-
-typedef VacPageData *VacPage;
-
-typedef struct VacPageListData
-{
-	int			empty_end_pages;/* Number of "empty" end-pages */
-	int			num_pages;		/* Number of pages in pagedesc */
-	int			num_allocated_pages;	/* Number of allocated pages in
-										 * pagedesc */
-	VacPage    *pagedesc;		/* Descriptions of pages */
-} VacPageListData;
-
-typedef VacPageListData *VacPageList;
-
-typedef struct
-{
-	Form_pg_attribute attr;
-	Datum		best,
-				guess1,
-				guess2,
-				max,
-				min;
-	int			best_len,
-				guess1_len,
-				guess2_len,
-				max_len,
-				min_len;
-	long		best_cnt,
-				guess1_cnt,
-				guess1_hits,
-				guess2_hits,
-				null_cnt,
-				nonnull_cnt,
-				max_cnt,
-				min_cnt;
-	FmgrInfo	f_cmpeq,
-				f_cmplt,
-				f_cmpgt;
-	Oid			op_cmplt;
-	regproc		outfunc;
-	Oid			typelem;
-	bool		initialized;
-} VacAttrStats;
-
-typedef struct VRelListData
-{
-	Oid			vrl_relid;
-	struct VRelListData *vrl_next;
-} VRelListData;
-
-typedef VRelListData *VRelList;
-
-typedef struct VTupleLinkData
-{
-	ItemPointerData new_tid;
-	ItemPointerData this_tid;
-} VTupleLinkData;
-
-typedef VTupleLinkData *VTupleLink;
-
-typedef struct VTupleMoveData
-{
-	ItemPointerData tid;		/* tuple ID */
-	VacPage		vacpage;		/* where to move */
-	bool		cleanVpd;		/* clean vacpage before using */
-} VTupleMoveData;
-
-typedef VTupleMoveData *VTupleMove;
-
-typedef struct VRelStats
-{
-	Oid			relid;
-	int			num_tuples;
-	int			num_pages;
-	Size		min_tlen;
-	Size		max_tlen;
-	bool		hasindex;
-	int			num_vtlinks;
-	VTupleLink	vtlinks;
-} VRelStats;
-
-extern bool VacuumRunning;
-
-extern void vc_abort(void);
-extern void vacuum(char *vacrel, bool verbose, bool analyze, List *anal_cols);
-extern void analyze_rel(Oid relid, List *anal_cols2, int MESSAGE_LEVEL);
-
-#define ATTNVALS_SCALE	1000000000		/* XXX so it can act as a float4 */
+/* in commands/vacuum.c */
+extern void vacuum(VacuumStmt *vacstmt);
+extern void vac_update_relstats(Oid relid, long num_pages, double num_tuples,
+								bool hasindex);
+/* in commands/analyze.c */
+extern void analyze_rel(Oid relid, VacuumStmt *vacstmt);
 
 #endif	 /* VACUUM_H */
diff --git a/src/include/config.h.in b/src/include/config.h.in
index 0d989dbbb31..01593a4ce96 100644
--- a/src/include/config.h.in
+++ b/src/include/config.h.in
@@ -8,7 +8,7 @@
  * or in config.h afterwards.  Of course, if you edit config.h, then your
  * changes will be overwritten the next time you run configure.
  *
- * $Id: config.h.in,v 1.162 2001/04/14 22:55:02 petere Exp $
+ * $Id: config.h.in,v 1.163 2001/05/07 00:43:25 tgl Exp $
  */
 
 #ifndef CONFIG_H
@@ -157,6 +157,11 @@
 #define FUNC_MAX_ARGS		INDEX_MAX_KEYS
 
 /*
+ * System default value for pg_attribute.attstattarget
+ */
+#define DEFAULT_ATTSTATTARGET	10
+
+/*
  * Define this to make libpgtcl's "pg_result -assign" command process C-style
  * backslash sequences in returned tuple data and convert Postgres array
  * attributes into Tcl lists.  CAUTION: this conversion is *wrong* unless
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h
index 2cf9378cf11..0967bef24ba 100644
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: execnodes.h,v 1.57 2001/03/22 04:00:50 momjian Exp $
+ * $Id: execnodes.h,v 1.58 2001/05/07 00:43:25 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -628,7 +628,6 @@ typedef struct GroupState
  *	 SortState information
  *
  *		sort_Done		indicates whether sort has been performed yet
- *		sort_Keys		scan key structures describing the sort keys
  *		tuplesortstate	private state of tuplesort.c
  * ----------------
  */
@@ -636,7 +635,6 @@ typedef struct SortState
 {
 	CommonScanState csstate;	/* its first field is NodeTag */
 	bool		sort_Done;
-	ScanKey		sort_Keys;
 	void	   *tuplesortstate;
 } SortState;
 
diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h
index 1614d787bcb..63b1b1046a8 100644
--- a/src/include/nodes/parsenodes.h
+++ b/src/include/nodes/parsenodes.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: parsenodes.h,v 1.126 2001/03/23 04:49:56 momjian Exp $
+ * $Id: parsenodes.h,v 1.127 2001/05/07 00:43:25 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -118,11 +118,12 @@ typedef struct AlterTableStmt
 	NodeTag		type;
 	char		subtype;		/*------------
 								 * 	A = add column
-								 *	T = alter column
+								 *	T = alter column default
+								 *	S = alter column statistics
 								 *	D = drop column
 								 *	C = add constraint
 								 *	X = drop constraint
-								 *	E = add toast table,
+								 *	E = create toast table
 								 *	U = change owner
 								 *------------
 								 */
@@ -690,16 +691,20 @@ typedef struct ClusterStmt
 } ClusterStmt;
 
 /* ----------------------
- *		Vacuum Statement
+ *		Vacuum and Analyze Statements
+ *
+ * Even though these are nominally two statements, it's convenient to use
+ * just one node type for both.
  * ----------------------
  */
 typedef struct VacuumStmt
 {
 	NodeTag		type;
-	bool		verbose;		/* print status info */
-	bool		analyze;		/* analyze data */
-	char	   *vacrel;			/* table to vacuum */
-	List	   *va_spec;		/* columns to analyse */
+	bool		vacuum;			/* do VACUUM step */
+	bool		analyze;		/* do ANALYZE step */
+	bool		verbose;		/* print progress info */
+	char	   *vacrel;			/* name of single table to process, or NULL */
+	List	   *va_cols;		/* list of column names, or NIL for all */
 } VacuumStmt;
 
 /* ----------------------
diff --git a/src/include/nodes/primnodes.h b/src/include/nodes/primnodes.h
index 3ae8e09f57a..9e69ed60992 100644
--- a/src/include/nodes/primnodes.h
+++ b/src/include/nodes/primnodes.h
@@ -10,7 +10,7 @@
  * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: primnodes.h,v 1.53 2001/03/22 04:00:52 momjian Exp $
+ * $Id: primnodes.h,v 1.54 2001/05/07 00:43:25 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -45,8 +45,8 @@ typedef struct FunctionCache *FunctionCachePtr;
  * reskey and reskeyop are the execution-time representation of sorting.
  * reskey must be zero in any non-sort-key item.  The reskey of sort key
  * targetlist items for a sort plan node is 1,2,...,n for the n sort keys.
- * The reskeyop of each such targetlist item is the sort operator's
- * regproc OID.  reskeyop will be zero in non-sort-key items.
+ * The reskeyop of each such targetlist item is the sort operator's OID.
+ * reskeyop will be zero in non-sort-key items.
  *
  * Both reskey and reskeyop are typically zero during parse/plan stages.
  * The executor does not pay any attention to ressortgroupref.
@@ -62,7 +62,7 @@ typedef struct Resdom
 	Index		ressortgroupref;
 	/* nonzero if referenced by a sort/group clause */
 	Index		reskey;			/* order of key in a sort (for those > 0) */
-	Oid			reskeyop;		/* sort operator's regproc Oid */
+	Oid			reskeyop;		/* sort operator's Oid */
 	bool		resjunk;		/* set to true to eliminate the attribute
 								 * from final target list */
 } Resdom;
diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h
index f643ef87968..c76d9b4af71 100644
--- a/src/include/nodes/relation.h
+++ b/src/include/nodes/relation.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: relation.h,v 1.54 2001/03/22 04:00:53 momjian Exp $
+ * $Id: relation.h,v 1.55 2001/05/07 00:43:26 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -512,8 +512,8 @@ typedef struct RestrictInfo
 	Oid			hashjoinoperator;		/* copy of clause operator */
 
 	/* cache space for hashclause processing; -1 if not yet set */
-	Selectivity left_dispersion;/* dispersion of left side */
-	Selectivity right_dispersion;		/* dispersion of right side */
+	Selectivity left_bucketsize;		/* avg bucketsize of left side */
+	Selectivity right_bucketsize;		/* avg bucketsize of right side */
 } RestrictInfo;
 
 /*
diff --git a/src/include/optimizer/cost.h b/src/include/optimizer/cost.h
index 5caa576f0c0..cbf6df063a3 100644
--- a/src/include/optimizer/cost.h
+++ b/src/include/optimizer/cost.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: cost.h,v 1.38 2001/02/16 00:03:05 tgl Exp $
+ * $Id: cost.h,v 1.39 2001/05/07 00:43:26 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -64,7 +64,8 @@ extern void cost_mergejoin(Path *path, Path *outer_path, Path *inner_path,
 			   List *restrictlist,
 			   List *outersortkeys, List *innersortkeys);
 extern void cost_hashjoin(Path *path, Path *outer_path, Path *inner_path,
-			  List *restrictlist, Selectivity innerdispersion);
+			  List *restrictlist, Selectivity innerbucketsize);
+extern Selectivity estimate_hash_bucketsize(Query *root, Var *var);
 extern Cost cost_qual_eval(List *quals);
 extern void set_baserel_size_estimates(Query *root, RelOptInfo *rel);
 extern void set_joinrel_size_estimates(Query *root, RelOptInfo *rel,
diff --git a/src/include/optimizer/pathnode.h b/src/include/optimizer/pathnode.h
index 5b71eded86f..0839feb4b2f 100644
--- a/src/include/optimizer/pathnode.h
+++ b/src/include/optimizer/pathnode.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: pathnode.h,v 1.35 2001/03/22 04:00:54 momjian Exp $
+ * $Id: pathnode.h,v 1.36 2001/05/07 00:43:26 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -59,7 +59,7 @@ extern HashPath *create_hashjoin_path(RelOptInfo *joinrel,
 					 Path *inner_path,
 					 List *restrict_clauses,
 					 List *hashclauses,
-					 Selectivity innerdispersion);
+					 Selectivity innerbucketsize);
 
 /*
  * prototypes for relnode.c
diff --git a/src/include/utils/lsyscache.h b/src/include/utils/lsyscache.h
index f1c4aff1c80..6b35deed286 100644
--- a/src/include/utils/lsyscache.h
+++ b/src/include/utils/lsyscache.h
@@ -6,7 +6,7 @@
  * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: lsyscache.h,v 1.30 2001/03/22 04:01:13 momjian Exp $
+ * $Id: lsyscache.h,v 1.31 2001/05/07 00:43:26 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -21,8 +21,6 @@ extern AttrNumber get_attnum(Oid relid, char *attname);
 extern Oid	get_atttype(Oid relid, AttrNumber attnum);
 extern bool get_attisset(Oid relid, char *attname);
 extern int32 get_atttypmod(Oid relid, AttrNumber attnum);
-extern double get_attdispersion(Oid relid, AttrNumber attnum,
-				  double min_estimate);
 extern RegProcedure get_opcode(Oid opno);
 extern char *get_opname(Oid opno);
 extern bool op_mergejoinable(Oid opno, Oid ltype, Oid rtype,
@@ -41,6 +39,14 @@ extern bool get_typbyval(Oid typid);
 extern void get_typlenbyval(Oid typid, int16 *typlen, bool *typbyval);
 extern char get_typstorage(Oid typid);
 extern Datum get_typdefault(Oid typid);
+extern bool get_attstatsslot(HeapTuple statstuple,
+							 Oid atttype, int32 atttypmod,
+							 int reqkind, Oid reqop,
+							 Datum **values, int *nvalues,
+							 float4 **numbers, int *nnumbers);
+extern void free_attstatsslot(Oid atttype,
+							  Datum *values, int nvalues,
+							  float4 *numbers, int nnumbers);
 
 #define TypeIsToastable(typid)	(get_typstorage(typid) != 'p')
 
diff --git a/src/include/utils/syscache.h b/src/include/utils/syscache.h
index 8d4e2ae62c4..342f7bf8a56 100644
--- a/src/include/utils/syscache.h
+++ b/src/include/utils/syscache.h
@@ -9,7 +9,7 @@
  * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: syscache.h,v 1.29 2001/03/22 04:01:14 momjian Exp $
+ * $Id: syscache.h,v 1.30 2001/05/07 00:43:26 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -53,7 +53,7 @@
 #define RULEOID			22
 #define SHADOWNAME		23
 #define SHADOWSYSID		24
-#define STATRELID		25
+#define STATRELATT		25
 #define TYPENAME		26
 #define TYPEOID			27
 
diff --git a/src/include/utils/tuplesort.h b/src/include/utils/tuplesort.h
index 7f273776c36..001761796e2 100644
--- a/src/include/utils/tuplesort.h
+++ b/src/include/utils/tuplesort.h
@@ -13,7 +13,7 @@
  * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: tuplesort.h,v 1.6 2001/01/24 19:43:29 momjian Exp $
+ * $Id: tuplesort.h,v 1.7 2001/05/07 00:43:26 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -36,8 +36,9 @@ typedef struct Tuplesortstate Tuplesortstate;
  */
 
 extern Tuplesortstate *tuplesort_begin_heap(TupleDesc tupDesc,
-					 int nkeys, ScanKey keys,
-					 bool randomAccess);
+					  int nkeys,
+					  Oid *sortOperators, AttrNumber *attNums,
+					  bool randomAccess);
 extern Tuplesortstate *tuplesort_begin_index(Relation indexRel,
 					  bool enforceUnique,
 					  bool randomAccess);
@@ -75,4 +76,19 @@ extern void tuplesort_rescan(Tuplesortstate *state);
 extern void tuplesort_markpos(Tuplesortstate *state);
 extern void tuplesort_restorepos(Tuplesortstate *state);
 
+/*
+ * This routine selects an appropriate sorting function to implement
+ * a sort operator as efficiently as possible.
+ */
+typedef enum
+{
+	SORTFUNC_LT,				/* raw "<" operator */
+	SORTFUNC_CMP,				/* -1 / 0 / 1 three-way comparator */
+	SORTFUNC_REVCMP				/* 1 / 0 / -1 (reversed) 3-way comparator */
+} SortFunctionKind;
+
+extern void SelectSortFunction(Oid sortOperator,
+							   RegProcedure *sortFunction,
+							   SortFunctionKind *kind);
+
 #endif	 /* TUPLESORT_H */
diff --git a/src/interfaces/ecpg/preproc/keywords.c b/src/interfaces/ecpg/preproc/keywords.c
index 5614a34b0fe..c03880f497d 100644
--- a/src/interfaces/ecpg/preproc/keywords.c
+++ b/src/interfaces/ecpg/preproc/keywords.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/interfaces/ecpg/preproc/keywords.c,v 1.39 2001/03/22 04:01:21 momjian Exp $
+ *	  $Header: /cvsroot/pgsql/src/interfaces/ecpg/preproc/keywords.c,v 1.40 2001/05/07 00:43:26 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -238,6 +238,7 @@ static ScanKeyword ScanKeywords[] = {
 	{"some", SOME},
 	{"start", START},
 	{"statement", STATEMENT},
+	{"statistics", STATISTICS},
 	{"stdin", STDIN},
 	{"stdout", STDOUT},
 	{"substring", SUBSTRING},
diff --git a/src/interfaces/ecpg/preproc/preproc.y b/src/interfaces/ecpg/preproc/preproc.y
index 345efb6576e..91708bd91fa 100644
--- a/src/interfaces/ecpg/preproc/preproc.y
+++ b/src/interfaces/ecpg/preproc/preproc.y
@@ -134,7 +134,7 @@ make_name(void)
 
 %union {
 	double                  dval;
-        int                     ival;
+	int                     ival;
 	char *                  str;
 	struct when             action;
 	struct index		index;
@@ -224,7 +224,7 @@ make_name(void)
 		NONE, NOTHING, NOTIFY, NOTNULL, OFFSET, OIDS,
 		OPERATOR, OWNER, PASSWORD, PROCEDURAL, REINDEX, RENAME, RESET,
 		RETURNS, ROW, RULE, SEQUENCE, SERIAL, SETOF, SHARE,
-		SHOW, START, STATEMENT, STDIN, STDOUT, SYSID TEMP,
+		SHOW, START, STATEMENT, STATISTICS, STDIN, STDOUT, SYSID TEMP,
 		TEMPLATE, TOAST, TRUNCATE, TRUSTED, UNLISTEN, UNTIL, VACUUM,
 		VALID, VERBOSE, VERSION
 
@@ -285,7 +285,7 @@ make_name(void)
 %type  <str>    file_name AexprConst ParamNo c_expr ConstTypename
 %type  <str>	in_expr_nodes a_expr b_expr TruncateStmt CommentStmt
 %type  <str> 	opt_indirection expr_list extract_list extract_arg
-%type  <str>	position_list substr_list substr_from alter_column_action
+%type  <str>	position_list substr_list substr_from alter_column_default
 %type  <str>	trim_list in_expr substr_for attr attrs drop_behavior
 %type  <str>	Typename SimpleTypename Generic Numeric generic opt_float opt_numeric
 %type  <str> 	opt_decimal Character character opt_varying opt_charset
@@ -293,7 +293,7 @@ make_name(void)
 %type  <str>	row_expr row_descriptor row_list ConstDatetime opt_chain
 %type  <str>	SelectStmt into_clause OptTemp ConstraintAttributeSpec
 %type  <str>	opt_table opt_all sort_clause sortby_list ConstraintAttr 
-%type  <str>	sortby OptUseOp opt_inh_star relation_name_list name_list
+%type  <str>	sortby OptUseOp relation_name_list name_list
 %type  <str>	group_clause having_clause from_clause opt_distinct
 %type  <str>	join_outer where_clause relation_expr sub_type opt_arg
 %type  <str>	opt_column_list insert_rest InsertStmt OptimizableStmt
@@ -301,8 +301,8 @@ make_name(void)
 %type  <str>    NotifyStmt columnElem copy_dirn UnlistenStmt copy_null
 %type  <str>    copy_delimiter ListenStmt CopyStmt copy_file_name opt_binary
 %type  <str>    opt_with_copy FetchStmt direction fetch_how_many from_in
-%type  <str>    ClosePortalStmt DropStmt VacuumStmt opt_verbose func_arg
-%type  <str>    opt_analyze opt_va_list va_list ExplainStmt index_params
+%type  <str>    ClosePortalStmt DropStmt VacuumStmt AnalyzeStmt opt_verbose func_arg
+%type  <str>    analyze_keyword opt_name_list ExplainStmt index_params
 %type  <str>    index_list func_index index_elem opt_class access_method_clause
 %type  <str>    index_opt_unique IndexStmt func_return ConstInterval
 %type  <str>    func_args_list func_args opt_with ProcedureStmt def_arg
@@ -329,7 +329,7 @@ make_name(void)
 %type  <str>	opt_cursor opt_lmode ConstraintsSetStmt comment_tg AllConst
 %type  <str>	case_expr when_clause_list case_default case_arg when_clause
 %type  <str>    select_clause opt_select_limit select_limit_value ConstraintTimeSpec
-%type  <str>    select_offset_value ReindexStmt join_type opt_only opt_boolean
+%type  <str>    select_offset_value ReindexStmt join_type opt_boolean
 %type  <str>	join_qual update_list AlterSchemaStmt joined_table
 %type  <str>	opt_level opt_lock lock_type users_in_new_group_clause
 %type  <str>    OptConstrFromTable comment_op OptTempTableName StringConst
@@ -447,6 +447,7 @@ stmt:  AlterSchemaStmt 			{ output_statement($1, 0, NULL, connection); }
 		| CreatedbStmt		{ output_statement($1, 0, NULL, connection); }
 		| DropdbStmt		{ output_statement($1, 0, NULL, connection); }
 		| VacuumStmt		{ output_statement($1, 0, NULL, connection); }
+		| AnalyzeStmt		{ output_statement($1, 0, NULL, connection); }
 		| VariableSetStmt	{ output_statement($1, 0, NULL, connection); }
 		| VariableShowStmt	{ output_statement($1, 0, NULL, connection); }
 		| VariableResetStmt	{ output_statement($1, 0, NULL, connection); }
@@ -909,39 +910,40 @@ CheckPointStmt: CHECKPOINT     { $$= make_str("checkpoint"); }
 
 /*****************************************************************************
  *
- *		QUERY :
- *
  *	ALTER TABLE variations
  *
  *****************************************************************************/
 
 AlterTableStmt:
-/* ALTER TABLE <name> ADD [COLUMN] <coldef> */
-        ALTER TABLE relation_name opt_inh_star ADD opt_column columnDef
+/* ALTER TABLE <relation> ADD [COLUMN] <coldef> */
+        ALTER TABLE relation_expr ADD opt_column columnDef
+		{
+			$$ = cat_str(5, make_str("alter table"), $3, make_str("add"), $5, $6);
+		}
+/* ALTER TABLE <relation> ALTER [COLUMN] <colname> {SET DEFAULT <expr>|DROP DEFAULT} */
+	| ALTER TABLE relation_expr ALTER opt_column ColId alter_column_default
 		{
-			$$ = cat_str(6, make_str("alter table"), $3, $4, make_str("add"), $6, $7);
+			$$ = cat_str(6, make_str("alter table"), $3, make_str("alter"), $5, $6, $7);
 		}
-/* ALTER TABLE <name> ALTER [COLUMN] <colname> {SET DEFAULT <expr>|DROP
-DEFAULT} */
-	| ALTER TABLE relation_name opt_inh_star ALTER opt_column ColId
-		alter_column_action
+/* ALTER TABLE <relation> ALTER [COLUMN] <colname> SET STATISTICS <Iconst> */
+	| ALTER TABLE relation_expr ALTER opt_column ColId SET STATISTICS Iconst
 		{
-			$$ = cat_str(7, make_str("alter table"), $3, $4, make_str("alter"), $6, $7, $8);
+			$$ = cat_str(7, make_str("alter table"), $3, make_str("alter"), $5, $6, make_str("set statistics"), $9);
 		}
-/* ALTER TABLE <name> DROP [COLUMN] <name> {RESTRICT|CASCADE} */
-	| ALTER TABLE relation_name opt_inh_star DROP opt_column ColId drop_behavior
+/* ALTER TABLE <relation> DROP [COLUMN] <colname> {RESTRICT|CASCADE} */
+	| ALTER TABLE relation_expr DROP opt_column ColId drop_behavior
 		{
-			$$ = cat_str(7, make_str("alter table"), $3, $4, make_str("drop"), $6, $7, $8);
+			$$ = cat_str(6, make_str("alter table"), $3, make_str("drop"), $5, $6, $7);
 		}
-/* ALTER TABLE <name> ADD CONSTRAINT ... */
-	| ALTER TABLE relation_name opt_inh_star ADD TableConstraint
+/* ALTER TABLE <relation> ADD CONSTRAINT ... */
+	| ALTER TABLE relation_expr ADD TableConstraint
 		{
-			$$ = cat_str(5, make_str("alter table"), $3, $4, make_str("add"), $6);
+			$$ = cat_str(4, make_str("alter table"), $3, make_str("add"), $5);
 		}
-/* ALTER TABLE <name> DROP CONSTRAINT ... */
-	| ALTER TABLE relation_name opt_inh_star DROP CONSTRAINT name drop_behavior
+/* ALTER TABLE <relation> DROP CONSTRAINT ... */
+	| ALTER TABLE relation_expr DROP CONSTRAINT name drop_behavior
 		{
-			$$ = cat_str(6, make_str("alter table"), $3, $4, make_str("drop constraint"), $7, $8);
+			$$ = cat_str(5, make_str("alter table"), $3, make_str("drop constraint"), $6, $7);
 		}
 /* ALTER TABLE <name> OWNER TO UserId */     
 	| ALTER TABLE relation_name OWNER TO UserId   
@@ -950,7 +952,7 @@ DEFAULT} */
 		}
 		;
 
-alter_column_action:
+alter_column_default:
         SET DEFAULT a_expr	{ $$ = cat2_str(make_str("set default"), $3); }
         | DROP DEFAULT          { $$ = make_str("drop default"); }
         ;
@@ -1234,10 +1236,6 @@ key_reference:  NO ACTION	{ $$ = make_str("no action"); }
 		| SET NULL_P	{ $$ = make_str("set null"); }
 		;
 
-opt_only: ONLY   	{ $$ = make_str("only"); }
-	| /*EMPTY*/	{ $$ = EMPTY; }
-	;
-
 OptInherit:  INHERITS '(' relation_name_list ')'                { $$ = cat_str(3, make_str("inherits ("), $3, make_str(")")); }
                 | /*EMPTY*/					{ $$ = EMPTY; }
                 ;      
@@ -2013,10 +2011,9 @@ opt_force:      FORCE		{ $$ = make_str("force"); }
  *
  *****************************************************************************/
 
-RenameStmt:  ALTER TABLE relation_name opt_inh_star
-				  RENAME opt_column opt_name TO name
+RenameStmt:  ALTER TABLE relation_expr RENAME opt_column opt_name TO name
 				{
-					$$ = cat_str(8, make_str("alter table"), $3, $4, make_str("rename"), $6, $7, make_str("to"), $9);
+					$$ = cat_str(7, make_str("alter table"), $3, make_str("rename"), $5, $6, make_str("to"), $8);
 				}
 		;
 
@@ -2250,38 +2247,44 @@ ClusterStmt:  CLUSTER index_name ON relation_name
  *
  *		QUERY:
  *				vacuum
+ *				analyze
  *
  *****************************************************************************/
 
-VacuumStmt:  VACUUM opt_verbose opt_analyze
+VacuumStmt:  VACUUM opt_verbose
+				{
+					$$ = cat_str(2, make_str("vacuum"), $2);
+				}
+		| VACUUM opt_verbose relation_name
 				{
 					$$ = cat_str(3, make_str("vacuum"), $2, $3);
 				}
-		| VACUUM opt_verbose opt_analyze relation_name opt_va_list
+		| VACUUM opt_verbose AnalyzeStmt
 				{
-					if ( strlen($5) > 0 && strlen($4) == 0 )
-						mmerror(ET_ERROR, "VACUUM syntax error at or near \"(\"\n\tRelations name must be specified");
-					$$ = cat_str(5, make_str("vacuum"), $2, $3, $4, $5);
+					$$ = cat_str(3, make_str("vacuum"), $2, $3);
 				}
 		;
 
-opt_verbose:  VERBOSE					{ $$ = make_str("verbose"); }
-		| /*EMPTY*/				{ $$ = EMPTY; }
+AnalyzeStmt:  analyze_keyword opt_verbose
+				{
+					$$ = cat_str(2, $1, $2);
+				}
+		| analyze_keyword opt_verbose relation_name opt_name_list
+				{
+					$$ = cat_str(4, $1, $2, $3, $4);
+				}
 		;
 
-opt_analyze:  ANALYZE					{ $$ = make_str("analyze"); }
-		| ANALYSE				{ $$ = make_str("analyse"); }
-		| /*EMPTY*/				{ $$ = EMPTY; }
+analyze_keyword:  ANALYZE					{ $$ = make_str("analyze"); }
+		| ANALYSE							{ $$ = make_str("analyse"); }
 		;
 
-opt_va_list:  '(' va_list ')'				{ $$ = cat_str(3, make_str("("), $2, make_str(")")); }
+opt_verbose:  VERBOSE					{ $$ = make_str("verbose"); }
 		| /*EMPTY*/				{ $$ = EMPTY; }
 		;
 
-va_list:  name
-				{ $$=$1; }
-		| va_list ',' name
-				{ $$=cat_str(3, $1, make_str(","), $3); }
+opt_name_list:  '(' name_list ')'		{ $$ = cat_str(3, make_str("("), $2, make_str(")")); }
+		| /*EMPTY*/				{ $$ = EMPTY; }
 		;
 
 
@@ -2383,9 +2386,9 @@ columnElem:  ColId opt_indirection
  *
  *****************************************************************************/
 
-DeleteStmt:  DELETE FROM opt_only name where_clause
+DeleteStmt:  DELETE FROM relation_expr where_clause
 				{
-					$$ = cat_str(4, make_str("delete from"), $3, $4, $5);
+					$$ = cat_str(3, make_str("delete from"), $3, $4);
 				}
 		;
 
@@ -2416,12 +2419,12 @@ opt_lmode:      SHARE                           { $$ = make_str("share"); }
  *
  *****************************************************************************/
 
-UpdateStmt:  UPDATE opt_only relation_name
+UpdateStmt:  UPDATE relation_expr
 			  SET update_target_list
 			  from_clause
 			  where_clause
 				{
-					$$ = cat_str(7, make_str("update"), $2, $3, make_str("set"), $5, $6, $7);
+					$$ = cat_str(6, make_str("update"), $2, make_str("set"), $4, $5, $6);
 				}
 		;
 
@@ -2667,10 +2670,6 @@ select_offset_value:  	PosIntConst	{
  *	...however, recursive addattr and rename supported.  make special
  *	cases for these.
  */
-opt_inh_star:  '*'					{ $$ = make_str("*"); }
-		| /*EMPTY*/				{ $$ = EMPTY; }
-		;
-
 relation_name_list:  name_list { $$ = $1; };
 
 name_list:  name
@@ -2704,7 +2703,7 @@ opt_for_update_clause: for_update_clause                { $$ = $1; }
 		| /* EMPTY */				{ $$ = EMPTY; }
                 ;
 
-update_list:  OF va_list
+update_list:  OF name_list
               {
 			$$ = cat2_str(make_str("of"), $2);
 	      }
@@ -5028,6 +5027,7 @@ TokenId:  ABSOLUTE			{ $$ = make_str("absolute"); }
 	| SHARE				{ $$ = make_str("share"); }
 	| START				{ $$ = make_str("start"); }
 	| STATEMENT			{ $$ = make_str("statement"); }
+	| STATISTICS		{ $$ = make_str("statistics"); }
 	| STDIN                         { $$ = make_str("stdin"); }
 	| STDOUT                        { $$ = make_str("stdout"); }
 	| SYSID                         { $$ = make_str("sysid"); }
diff --git a/src/test/regress/expected/oidjoins.out b/src/test/regress/expected/oidjoins.out
index a2b0ad9e3e7..46bc60f6955 100644
--- a/src/test/regress/expected/oidjoins.out
+++ b/src/test/regress/expected/oidjoins.out
@@ -353,12 +353,28 @@ WHERE	pg_statistic.starelid != 0 AND
 -----+----------
 (0 rows)
 
-SELECT	oid, pg_statistic.staop 
+SELECT	oid, pg_statistic.staop1 
 FROM	pg_statistic 
-WHERE	pg_statistic.staop != 0 AND 
-	NOT EXISTS(SELECT * FROM pg_operator AS t1 WHERE t1.oid = pg_statistic.staop);
- oid | staop 
------+-------
+WHERE	pg_statistic.staop1 != 0 AND 
+	NOT EXISTS(SELECT * FROM pg_operator AS t1 WHERE t1.oid = pg_statistic.staop1);
+ oid | staop1 
+-----+--------
+(0 rows)
+
+SELECT	oid, pg_statistic.staop2 
+FROM	pg_statistic 
+WHERE	pg_statistic.staop2 != 0 AND 
+	NOT EXISTS(SELECT * FROM pg_operator AS t1 WHERE t1.oid = pg_statistic.staop2);
+ oid | staop2 
+-----+--------
+(0 rows)
+
+SELECT	oid, pg_statistic.staop3 
+FROM	pg_statistic 
+WHERE	pg_statistic.staop3 != 0 AND 
+	NOT EXISTS(SELECT * FROM pg_operator AS t1 WHERE t1.oid = pg_statistic.staop3);
+ oid | staop3 
+-----+--------
 (0 rows)
 
 SELECT	oid, pg_trigger.tgrelid 
diff --git a/src/test/regress/expected/opr_sanity.out b/src/test/regress/expected/opr_sanity.out
index 9d4ff1b3985..1b094a6e3bf 100644
--- a/src/test/regress/expected/opr_sanity.out
+++ b/src/test/regress/expected/opr_sanity.out
@@ -482,8 +482,8 @@ WHERE p1.aggtransfn = p2.oid AND
           (p2.pronargs = 1 AND p1.aggbasetype = 0)));
   oid  | aggname | oid |   proname   
 -------+---------+-----+-------------
- 16997 | max     | 768 | int4larger
- 17011 | min     | 769 | int4smaller
+ 17010 | max     | 768 | int4larger
+ 17024 | min     | 769 | int4smaller
 (2 rows)
 
 -- Cross-check finalfn (if present) against its entry in pg_proc.
diff --git a/src/test/regress/sql/oidjoins.sql b/src/test/regress/sql/oidjoins.sql
index b7ea1f63eaa..88727a6c76e 100644
--- a/src/test/regress/sql/oidjoins.sql
+++ b/src/test/regress/sql/oidjoins.sql
@@ -177,10 +177,18 @@ SELECT	oid, pg_statistic.starelid
 FROM	pg_statistic 
 WHERE	pg_statistic.starelid != 0 AND 
 	NOT EXISTS(SELECT * FROM pg_class AS t1 WHERE t1.oid = pg_statistic.starelid);
-SELECT	oid, pg_statistic.staop 
+SELECT	oid, pg_statistic.staop1 
 FROM	pg_statistic 
-WHERE	pg_statistic.staop != 0 AND 
-	NOT EXISTS(SELECT * FROM pg_operator AS t1 WHERE t1.oid = pg_statistic.staop);
+WHERE	pg_statistic.staop1 != 0 AND 
+	NOT EXISTS(SELECT * FROM pg_operator AS t1 WHERE t1.oid = pg_statistic.staop1);
+SELECT	oid, pg_statistic.staop2 
+FROM	pg_statistic 
+WHERE	pg_statistic.staop2 != 0 AND 
+	NOT EXISTS(SELECT * FROM pg_operator AS t1 WHERE t1.oid = pg_statistic.staop2);
+SELECT	oid, pg_statistic.staop3 
+FROM	pg_statistic 
+WHERE	pg_statistic.staop3 != 0 AND 
+	NOT EXISTS(SELECT * FROM pg_operator AS t1 WHERE t1.oid = pg_statistic.staop3);
 SELECT	oid, pg_trigger.tgrelid 
 FROM	pg_trigger 
 WHERE	pg_trigger.tgrelid != 0 AND