diff options
Diffstat (limited to 'src')
58 files changed, 3606 insertions, 1889 deletions
diff --git a/src/backend/access/common/tupdesc.c b/src/backend/access/common/tupdesc.c index 769f754b669..86d704e8d08 100644 --- a/src/backend/access/common/tupdesc.c +++ b/src/backend/access/common/tupdesc.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/common/tupdesc.c,v 1.73 2001/03/22 06:16:06 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/common/tupdesc.c,v 1.74 2001/05/07 00:43:15 tgl Exp $ * * NOTES * some of the executor utility code such as "ExecTypeFromTL" should be @@ -237,16 +237,16 @@ equalTupleDescs(TupleDesc tupdesc1, TupleDesc tupdesc2) Form_pg_attribute attr2 = tupdesc2->attrs[i]; /* - * We do not need to check every single field here, and in fact - * some fields such as attdispersion probably shouldn't be - * compared. We can also disregard attnum (it was used to place - * the row in the attrs array) and everything derived from the - * column datatype. + * We do not need to check every single field here: we can disregard + * attrelid, attnum (it was used to place the row in the attrs array) + * and everything derived from the column datatype. */ if (strcmp(NameStr(attr1->attname), NameStr(attr2->attname)) != 0) return false; if (attr1->atttypid != attr2->atttypid) return false; + if (attr1->attstattarget != attr2->attstattarget) + return false; if (attr1->atttypmod != attr2->atttypmod) return false; if (attr1->attstorage != attr2->attstorage) @@ -365,12 +365,12 @@ TupleDescInitEntry(TupleDesc desc, else MemSet(NameStr(att->attname), 0, NAMEDATALEN); - att->attdispersion = 0; /* dummy value */ + att->attstattarget = 0; att->attcacheoff = -1; att->atttypmod = typmod; att->attnum = attributeNumber; - att->attnelems = attdim; + att->attndims = attdim; att->attisset = attisset; att->attnotnull = false; @@ -506,7 +506,7 @@ TupleDescMakeSelfReference(TupleDesc desc, att->attbyval = true; att->attalign = 'i'; att->attstorage = 'p'; - att->attnelems = 0; + att->attndims = 0; } /* ---------------------------------------------------------------- diff --git a/src/backend/access/gist/gist.c b/src/backend/access/gist/gist.c index 1c5577b88a0..06010896821 100644 --- a/src/backend/access/gist/gist.c +++ b/src/backend/access/gist/gist.c @@ -6,7 +6,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/gist/gist.c,v 1.72 2001/03/22 03:59:12 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/gist/gist.c,v 1.73 2001/05/07 00:43:15 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -84,8 +84,8 @@ static void gist_dumptree(Relation r, int level, BlockNumber blk, OffsetNumber c #endif /* -** routine to build an index. Basically calls insert over and over -*/ + * routine to build an index. Basically calls insert over and over + */ Datum gistbuild(PG_FUNCTION_ARGS) { @@ -105,7 +105,7 @@ gistbuild(PG_FUNCTION_ARGS) itupdesc; Datum attdata[INDEX_MAX_KEYS]; char nulls[INDEX_MAX_KEYS]; - int nhtups, + double nhtups, nitups; Node *pred = indexInfo->ii_Predicate; @@ -172,7 +172,7 @@ gistbuild(PG_FUNCTION_ARGS) #endif /* OMIT_PARTIAL_INDEX */ /* build the index */ - nhtups = nitups = 0; + nhtups = nitups = 0.0; compvec = (bool *) palloc(sizeof(bool) * indexInfo->ii_NumIndexAttrs); @@ -183,7 +183,7 @@ gistbuild(PG_FUNCTION_ARGS) { MemoryContextReset(econtext->ecxt_per_tuple_memory); - nhtups++; + nhtups += 1.0; #ifndef OMIT_PARTIAL_INDEX @@ -196,7 +196,7 @@ gistbuild(PG_FUNCTION_ARGS) slot->val = htup; if (ExecQual((List *) oldPred, econtext, false)) { - nitups++; + nitups += 1.0; continue; } } @@ -213,7 +213,7 @@ gistbuild(PG_FUNCTION_ARGS) } #endif /* OMIT_PARTIAL_INDEX */ - nitups++; + nitups += 1.0; /* * For the current heap tuple, extract all the attributes we use diff --git a/src/backend/access/hash/hash.c b/src/backend/access/hash/hash.c index aa76ba232a0..9617fcc33a6 100644 --- a/src/backend/access/hash/hash.c +++ b/src/backend/access/hash/hash.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/hash/hash.c,v 1.50 2001/03/22 03:59:12 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/hash/hash.c,v 1.51 2001/05/07 00:43:15 tgl Exp $ * * NOTES * This file contains only the public interface routines. @@ -57,7 +57,7 @@ hashbuild(PG_FUNCTION_ARGS) itupdesc; Datum attdata[INDEX_MAX_KEYS]; char nulls[INDEX_MAX_KEYS]; - int nhtups, + double nhtups, nitups; HashItem hitem; Node *pred = indexInfo->ii_Predicate; @@ -109,7 +109,7 @@ hashbuild(PG_FUNCTION_ARGS) #endif /* OMIT_PARTIAL_INDEX */ /* build the index */ - nhtups = nitups = 0; + nhtups = nitups = 0.0; /* start a heap scan */ hscan = heap_beginscan(heap, 0, SnapshotNow, 0, (ScanKey) NULL); @@ -118,7 +118,7 @@ hashbuild(PG_FUNCTION_ARGS) { MemoryContextReset(econtext->ecxt_per_tuple_memory); - nhtups++; + nhtups += 1.0; #ifndef OMIT_PARTIAL_INDEX @@ -131,7 +131,7 @@ hashbuild(PG_FUNCTION_ARGS) slot->val = htup; if (ExecQual((List *) oldPred, econtext, false)) { - nitups++; + nitups += 1.0; continue; } } @@ -148,7 +148,7 @@ hashbuild(PG_FUNCTION_ARGS) } #endif /* OMIT_PARTIAL_INDEX */ - nitups++; + nitups += 1.0; /* * For the current heap tuple, extract all the attributes we use diff --git a/src/backend/access/heap/tuptoaster.c b/src/backend/access/heap/tuptoaster.c index fb509ab66de..2a9df577b10 100644 --- a/src/backend/access/heap/tuptoaster.c +++ b/src/backend/access/heap/tuptoaster.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/heap/tuptoaster.c,v 1.21 2001/03/25 00:45:20 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/heap/tuptoaster.c,v 1.22 2001/05/07 00:43:15 tgl Exp $ * * * INTERFACE ROUTINES @@ -167,6 +167,43 @@ heap_tuple_untoast_attr(varattrib *attr) /* ---------- + * toast_raw_datum_size - + * + * Return the raw (detoasted) size of a varlena datum + * ---------- + */ +Size +toast_raw_datum_size(Datum value) +{ + varattrib *attr = (varattrib *) DatumGetPointer(value); + Size result; + + if (VARATT_IS_COMPRESSED(attr)) + { + /* + * va_rawsize shows the original data size, whether the datum + * is external or not. + */ + result = attr->va_content.va_compressed.va_rawsize + VARHDRSZ; + } + else if (VARATT_IS_EXTERNAL(attr)) + { + /* + * an uncompressed external attribute has rawsize including the + * header (not too consistent!) + */ + result = attr->va_content.va_external.va_rawsize; + } + else + { + /* plain untoasted datum */ + result = VARSIZE(attr); + } + return result; +} + + +/* ---------- * toast_delete - * * Cascaded delete toast-entries on DELETE diff --git a/src/backend/access/nbtree/nbtree.c b/src/backend/access/nbtree/nbtree.c index 97d99da4fde..f456e0c9306 100644 --- a/src/backend/access/nbtree/nbtree.c +++ b/src/backend/access/nbtree/nbtree.c @@ -12,7 +12,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtree.c,v 1.79 2001/03/22 03:59:15 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtree.c,v 1.80 2001/05/07 00:43:16 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -69,7 +69,7 @@ btbuild(PG_FUNCTION_ARGS) itupdesc; Datum attdata[INDEX_MAX_KEYS]; char nulls[INDEX_MAX_KEYS]; - int nhtups, + double nhtups, nitups; Node *pred = indexInfo->ii_Predicate; @@ -156,7 +156,7 @@ btbuild(PG_FUNCTION_ARGS) #endif /* OMIT_PARTIAL_INDEX */ /* build the index */ - nhtups = nitups = 0; + nhtups = nitups = 0.0; if (usefast) { @@ -196,7 +196,7 @@ btbuild(PG_FUNCTION_ARGS) MemoryContextReset(econtext->ecxt_per_tuple_memory); - nhtups++; + nhtups += 1.0; #ifndef OMIT_PARTIAL_INDEX @@ -209,7 +209,7 @@ btbuild(PG_FUNCTION_ARGS) slot->val = htup; if (ExecQual((List *) oldPred, econtext, false)) { - nitups++; + nitups += 1.0; continue; } } @@ -226,7 +226,7 @@ btbuild(PG_FUNCTION_ARGS) } #endif /* OMIT_PARTIAL_INDEX */ - nitups++; + nitups += 1.0; /* * For the current heap tuple, extract all the attributes we use diff --git a/src/backend/access/rtree/rtree.c b/src/backend/access/rtree/rtree.c index 3752a59e99a..a8c6a13ea3c 100644 --- a/src/backend/access/rtree/rtree.c +++ b/src/backend/access/rtree/rtree.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/rtree/Attic/rtree.c,v 1.61 2001/03/22 03:59:16 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/rtree/Attic/rtree.c,v 1.62 2001/05/07 00:43:16 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -100,7 +100,7 @@ rtbuild(PG_FUNCTION_ARGS) itupdesc; Datum attdata[INDEX_MAX_KEYS]; char nulls[INDEX_MAX_KEYS]; - int nhtups, + double nhtups, nitups; Node *pred = indexInfo->ii_Predicate; @@ -163,7 +163,7 @@ rtbuild(PG_FUNCTION_ARGS) #endif /* OMIT_PARTIAL_INDEX */ /* count the tuples as we insert them */ - nhtups = nitups = 0; + nhtups = nitups = 0.0; /* start a heap scan */ hscan = heap_beginscan(heap, 0, SnapshotNow, 0, (ScanKey) NULL); @@ -172,7 +172,7 @@ rtbuild(PG_FUNCTION_ARGS) { MemoryContextReset(econtext->ecxt_per_tuple_memory); - nhtups++; + nhtups += 1.0; #ifndef OMIT_PARTIAL_INDEX @@ -185,7 +185,7 @@ rtbuild(PG_FUNCTION_ARGS) slot->val = htup; if (ExecQual((List *) oldPred, econtext, false)) { - nitups++; + nitups += 1.0; continue; } } @@ -202,7 +202,7 @@ rtbuild(PG_FUNCTION_ARGS) } #endif /* OMIT_PARTIAL_INDEX */ - nitups++; + nitups += 1.0; /* * For the current heap tuple, extract all the attributes we use diff --git a/src/backend/catalog/genbki.sh b/src/backend/catalog/genbki.sh index c2993fa8fc6..cac53f3e085 100644 --- a/src/backend/catalog/genbki.sh +++ b/src/backend/catalog/genbki.sh @@ -10,7 +10,7 @@ # # # IDENTIFICATION -# $Header: /cvsroot/pgsql/src/backend/catalog/Attic/genbki.sh,v 1.19 2001/01/16 22:48:34 tgl Exp $ +# $Header: /cvsroot/pgsql/src/backend/catalog/Attic/genbki.sh,v 1.20 2001/05/07 00:43:16 tgl Exp $ # # NOTES # non-essential whitespace is removed from the generated file. @@ -126,10 +126,12 @@ for dir in $INCLUDE_DIRS; do fi done -# Get INDEX_MAX_KEYS from config.h (who needs consistency?) +# Get INDEX_MAX_KEYS and DEFAULT_ATTSTATTARGET from config.h +# (who needs consistency?) for dir in $INCLUDE_DIRS; do if [ -f "$dir/config.h" ]; then INDEXMAXKEYS=`grep '#define[ ]*INDEX_MAX_KEYS' $dir/config.h | $AWK '{ print $3 }'` + DEFAULTATTSTATTARGET=`grep '#define[ ]*DEFAULT_ATTSTATTARGET' $dir/config.h | $AWK '{ print $3 }'` break fi done @@ -168,6 +170,7 @@ sed -e "s/;[ ]*$//g" \ -e "s/(NameData/(name/g" \ -e "s/(Oid/(oid/g" \ -e "s/NAMEDATALEN/$NAMEDATALEN/g" \ + -e "s/DEFAULT_ATTSTATTARGET/$DEFAULTATTSTATTARGET/g" \ -e "s/INDEX_MAX_KEYS\*2/$INDEXMAXKEYS2/g" \ -e "s/INDEX_MAX_KEYS\*4/$INDEXMAXKEYS4/g" \ -e "s/INDEX_MAX_KEYS/$INDEXMAXKEYS/g" \ diff --git a/src/backend/catalog/heap.c b/src/backend/catalog/heap.c index 54867d51a4b..03f16e11c3f 100644 --- a/src/backend/catalog/heap.c +++ b/src/backend/catalog/heap.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/catalog/heap.c,v 1.162 2001/03/22 06:16:10 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/catalog/heap.c,v 1.163 2001/05/07 00:43:17 tgl Exp $ * * * INTERFACE ROUTINES @@ -96,54 +96,72 @@ static void RemoveStatistics(Relation rel); /* * Note: - * Should the executor special case these attributes in the future? - * Advantage: consume 1/2 the space in the ATTRIBUTE relation. - * Disadvantage: having rules to compute values in these tuples may - * be more difficult if not impossible. + * Should the system special case these attributes in the future? + * Advantage: consume much less space in the ATTRIBUTE relation. + * Disadvantage: special cases will be all over the place. */ static FormData_pg_attribute a1 = { - 0xffffffff, {"ctid"}, TIDOID, 0, sizeof(ItemPointerData), - SelfItemPointerAttributeNumber, 0, -1, -1, '\0', 'p', '\0', 'i', '\0', '\0' + 0, {"ctid"}, TIDOID, 0, sizeof(ItemPointerData), + SelfItemPointerAttributeNumber, 0, -1, -1, + false, 'p', false, 'i', false, false }; static FormData_pg_attribute a2 = { - 0xffffffff, {"oid"}, OIDOID, 0, sizeof(Oid), - ObjectIdAttributeNumber, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' + 0, {"oid"}, OIDOID, 0, sizeof(Oid), + ObjectIdAttributeNumber, 0, -1, -1, + true, 'p', false, 'i', false, false }; static FormData_pg_attribute a3 = { - 0xffffffff, {"xmin"}, XIDOID, 0, sizeof(TransactionId), - MinTransactionIdAttributeNumber, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' + 0, {"xmin"}, XIDOID, 0, sizeof(TransactionId), + MinTransactionIdAttributeNumber, 0, -1, -1, + true, 'p', false, 'i', false, false }; static FormData_pg_attribute a4 = { - 0xffffffff, {"cmin"}, CIDOID, 0, sizeof(CommandId), - MinCommandIdAttributeNumber, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' + 0, {"cmin"}, CIDOID, 0, sizeof(CommandId), + MinCommandIdAttributeNumber, 0, -1, -1, + true, 'p', false, 'i', false, false }; static FormData_pg_attribute a5 = { - 0xffffffff, {"xmax"}, XIDOID, 0, sizeof(TransactionId), - MaxTransactionIdAttributeNumber, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' + 0, {"xmax"}, XIDOID, 0, sizeof(TransactionId), + MaxTransactionIdAttributeNumber, 0, -1, -1, + true, 'p', false, 'i', false, false }; static FormData_pg_attribute a6 = { - 0xffffffff, {"cmax"}, CIDOID, 0, sizeof(CommandId), - MaxCommandIdAttributeNumber, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' + 0, {"cmax"}, CIDOID, 0, sizeof(CommandId), + MaxCommandIdAttributeNumber, 0, -1, -1, + true, 'p', false, 'i', false, false }; /* - We decide to call this attribute "tableoid" rather than say -"classoid" on the basis that in the future there may be more than one -table of a particular class/type. In any case table is still the word -used in SQL. -*/ + * We decided to call this attribute "tableoid" rather than say + * "classoid" on the basis that in the future there may be more than one + * table of a particular class/type. In any case table is still the word + * used in SQL. + */ static FormData_pg_attribute a7 = { - 0xffffffff, {"tableoid"}, OIDOID, 0, sizeof(Oid), - TableOidAttributeNumber, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' + 0, {"tableoid"}, OIDOID, 0, sizeof(Oid), + TableOidAttributeNumber, 0, -1, -1, + true, 'p', false, 'i', false, false }; -static Form_pg_attribute HeapAtt[] = {&a1, &a2, &a3, &a4, &a5, &a6, &a7}; +static Form_pg_attribute SysAtt[] = {&a1, &a2, &a3, &a4, &a5, &a6, &a7}; + +/* + * This function returns a Form_pg_attribute pointer for a system attribute. + */ +Form_pg_attribute +SystemAttributeDefinition(AttrNumber attno) +{ + if (attno >= 0 || attno < - (int) lengthof(SysAtt)) + elog(ERROR, "SystemAttributeDefinition: invalid attribute number %d", + attno); + return SysAtt[-attno - 1]; +} /* ---------------------------------------------------------------- * XXX END OF UGLY HARD CODED BADNESS XXX @@ -380,32 +398,6 @@ heap_storage_create(Relation rel) * 8) the relations are closed and the new relation's oid * is returned. * - * old comments: - * A new relation is inserted into the RELATION relation - * with the specified attribute(s) (newly inserted into - * the ATTRIBUTE relation). How does concurrency control - * work? Is it automatic now? Expects the caller to have - * attname, atttypid, atttyparg, attproc, and attlen domains filled. - * Create fills the attnum domains sequentually from zero, - * fills the attdispersion domains with zeros, and fills the - * attrelid fields with the relid. - * - * scan relation catalog for name conflict - * scan type catalog for typids (if not arg) - * create and insert attribute(s) into attribute catalog - * create new relation - * insert new relation into attribute catalog - * - * Should coordinate with heap_create_with_catalog(). Either - * it should not be called or there should be a way to prevent - * the relation from being removed at the end of the - * transaction if it is successful ('u'/'r' may be enough). - * Also, if the transaction does not commit, then the - * relation should be removed. - * - * XXX amcreate ignores "off" when inserting (for now). - * XXX amcreate (like the other utilities) needs to understand indexes. - * * ---------------------------------------------------------------- */ @@ -432,14 +424,14 @@ CheckAttributeNames(TupleDesc tupdesc) */ for (i = 0; i < natts; i++) { - for (j = 0; j < (int) (sizeof(HeapAtt) / sizeof(HeapAtt[0])); j++) + for (j = 0; j < (int) lengthof(SysAtt); j++) { - if (strcmp(NameStr(HeapAtt[j]->attname), + if (strcmp(NameStr(SysAtt[j]->attname), NameStr(tupdesc->attrs[i]->attname)) == 0) { elog(ERROR, "Attribute '%s' has a name conflict" "\n\tName matches an existing system attribute", - NameStr(HeapAtt[j]->attname)); + NameStr(SysAtt[j]->attname)); } } if (tupdesc->attrs[i]->atttypid == UNKNOWNOID) @@ -574,7 +566,7 @@ AddNewAttributeTuples(Oid new_rel_oid, /* Fill in the correct relation OID */ (*dpp)->attrelid = new_rel_oid; /* Make sure these are OK, too */ - (*dpp)->attdispersion = 0; + (*dpp)->attstattarget = DEFAULT_ATTSTATTARGET; (*dpp)->attcacheoff = -1; tup = heap_addheader(Natts_pg_attribute, @@ -593,14 +585,14 @@ AddNewAttributeTuples(Oid new_rel_oid, /* * next we add the system attributes.. */ - dpp = HeapAtt; + dpp = SysAtt; for (i = 0; i < -1 - FirstLowInvalidHeapAttributeNumber; i++) { /* Fill in the correct relation OID */ /* HACK: we are writing on static data here */ (*dpp)->attrelid = new_rel_oid; /* Unneeded since they should be OK in the constant data anyway */ - /* (*dpp)->attdispersion = 0; */ + /* (*dpp)->attstattarget = 0; */ /* (*dpp)->attcacheoff = -1; */ tup = heap_addheader(Natts_pg_attribute, @@ -669,8 +661,23 @@ AddNewRelationTuple(Relation pg_class_desc, * save. (NOTE: CREATE INDEX inserts the same bogus estimates if it * finds the relation has 0 rows and pages. See index.c.) */ - new_rel_reltup->relpages = 10; /* bogus estimates */ - new_rel_reltup->reltuples = 1000; + switch (relkind) + { + case RELKIND_RELATION: + case RELKIND_INDEX: + case RELKIND_TOASTVALUE: + new_rel_reltup->relpages = 10; /* bogus estimates */ + new_rel_reltup->reltuples = 1000; + break; + case RELKIND_SEQUENCE: + new_rel_reltup->relpages = 1; + new_rel_reltup->reltuples = 1; + break; + default: /* views, etc */ + new_rel_reltup->relpages = 0; + new_rel_reltup->reltuples = 0; + break; + } new_rel_reltup->relowner = GetUserId(); new_rel_reltup->reltype = new_type_oid; diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c index 2adb30e1ed8..5eefab11489 100644 --- a/src/backend/catalog/index.c +++ b/src/backend/catalog/index.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/catalog/index.c,v 1.145 2001/04/02 14:34:25 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/catalog/index.c,v 1.146 2001/05/07 00:43:17 tgl Exp $ * * * INTERFACE ROUTINES @@ -55,7 +55,7 @@ */ #define AVG_ATTR_SIZE 8 #define NTUPLES_PER_PAGE(natts) \ - ((BLCKSZ - MAXALIGN(sizeof (PageHeaderData))) / \ + ((BLCKSZ - MAXALIGN(sizeof(PageHeaderData))) / \ ((natts) * AVG_ATTR_SIZE + MAXALIGN(sizeof(HeapTupleHeaderData)))) /* non-export function prototypes */ @@ -99,39 +99,6 @@ IsReindexProcessing(void) } /* ---------------------------------------------------------------- - * sysatts is a structure containing attribute tuple forms - * for system attributes (numbered -1, -2, ...). This really - * should be generated or eliminated or moved elsewhere. -cim 1/19/91 - * - * typedef struct FormData_pg_attribute { - * Oid attrelid; - * NameData attname; - * Oid atttypid; - * uint32 attnvals; - * int16 attlen; - * AttrNumber attnum; - * uint32 attnelems; - * int32 attcacheoff; - * int32 atttypmod; - * bool attbyval; - * bool attisset; - * char attalign; - * bool attnotnull; - * bool atthasdef; - * } FormData_pg_attribute; - * - * ---------------------------------------------------------------- - */ -static FormData_pg_attribute sysatts[] = { - {0, {"ctid"}, TIDOID, 0, 6, -1, 0, -1, -1, '\0', 'p', '\0', 'i', '\0', '\0'}, - {0, {"oid"}, OIDOID, 0, 4, -2, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0'}, - {0, {"xmin"}, XIDOID, 0, 4, -3, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0'}, - {0, {"cmin"}, CIDOID, 0, 4, -4, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0'}, - {0, {"xmax"}, XIDOID, 0, 4, -5, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0'}, - {0, {"cmax"}, CIDOID, 0, 4, -6, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0'}, -}; - -/* ---------------------------------------------------------------- * GetHeapRelationOid * ---------------------------------------------------------------- */ @@ -250,7 +217,6 @@ ConstructTupleDescriptor(Relation heapRelation, for (i = 0; i < numatts; i++) { AttrNumber atnum; /* attributeNumber[attributeOffset] */ - AttrNumber atind; Form_pg_attribute from; Form_pg_attribute to; @@ -264,16 +230,9 @@ ConstructTupleDescriptor(Relation heapRelation, { /* - * here we are indexing on a system attribute (-1...-n) so we - * convert atnum into a usable index 0...n-1 so we can use it - * to dereference the array sysatts[] which stores tuple - * descriptor information for system attributes. + * here we are indexing on a system attribute (-1...-n) */ - if (atnum <= FirstLowInvalidHeapAttributeNumber || atnum >= 0) - elog(ERROR, "Cannot create index on system attribute: attribute number out of range (%d)", atnum); - atind = (-atnum) - 1; - - from = &sysatts[atind]; + from = SystemAttributeDefinition(atnum); } else { @@ -284,9 +243,8 @@ ConstructTupleDescriptor(Relation heapRelation, if (atnum > natts) elog(ERROR, "Cannot create index: attribute %d does not exist", atnum); - atind = AttrNumberGetAttrOffset(atnum); - from = heapTupDesc->attrs[atind]; + from = heapTupDesc->attrs[AttrNumberGetAttrOffset(atnum)]; } /* @@ -303,10 +261,10 @@ ConstructTupleDescriptor(Relation heapRelation, */ to->attnum = i + 1; - to->attdispersion = 0.0; + to->attstattarget = 0; + to->attcacheoff = -1; to->attnotnull = false; to->atthasdef = false; - to->attcacheoff = -1; /* * We do not yet have the correct relation OID for the index, so @@ -1542,10 +1500,14 @@ setNewRelfilenode(Relation relation) /* ---------------- * UpdateStats + * + * Update pg_class' relpages and reltuples statistics for the given relation + * (which can be either a table or an index). Note that this is not used + * in the context of VACUUM. * ---------------- */ void -UpdateStats(Oid relid, long reltuples) +UpdateStats(Oid relid, double reltuples) { Relation whichRel; Relation pg_class; @@ -1636,6 +1598,10 @@ UpdateStats(Oid relid, long reltuples) * with zero size statistics until a VACUUM is done. The optimizer * will generate very bad plans if the stats claim the table is empty * when it is actually sizable. See also CREATE TABLE in heap.c. + * + * Note: this path is also taken during bootstrap, because bootstrap.c + * passes reltuples = 0 after loading a table. We have to estimate some + * number for reltuples based on the actual number of pages. */ relpages = RelationGetNumberOfBlocks(whichRel); @@ -1689,15 +1655,15 @@ UpdateStats(Oid relid, long reltuples) for (i = 0; i < Natts_pg_class; i++) { - nulls[i] = heap_attisnull(tuple, i + 1) ? 'n' : ' '; + nulls[i] = ' '; replace[i] = ' '; values[i] = (Datum) NULL; } replace[Anum_pg_class_relpages - 1] = 'r'; - values[Anum_pg_class_relpages - 1] = (Datum) relpages; + values[Anum_pg_class_relpages - 1] = Int32GetDatum(relpages); replace[Anum_pg_class_reltuples - 1] = 'r'; - values[Anum_pg_class_reltuples - 1] = (Datum) reltuples; + values[Anum_pg_class_reltuples - 1] = Float4GetDatum((float4) reltuples); newtup = heap_modifytuple(tuple, pg_class, values, nulls, replace); simple_heap_update(pg_class, &tuple->t_self, newtup); if (!IsIgnoringSystemIndexes()) @@ -1741,7 +1707,7 @@ DefaultBuild(Relation heapRelation, TupleDesc heapDescriptor; Datum datum[INDEX_MAX_KEYS]; char nullv[INDEX_MAX_KEYS]; - long reltuples, + double reltuples, indtuples; Node *predicate = indexInfo->ii_Predicate; @@ -1796,7 +1762,7 @@ DefaultBuild(Relation heapRelation, 0, /* number of keys */ (ScanKey) NULL); /* scan key */ - reltuples = indtuples = 0; + reltuples = indtuples = 0.0; /* * for each tuple in the base relation, we create an index tuple and @@ -1808,7 +1774,7 @@ DefaultBuild(Relation heapRelation, { MemoryContextReset(econtext->ecxt_per_tuple_memory); - reltuples++; + reltuples += 1.0; #ifndef OMIT_PARTIAL_INDEX @@ -1821,7 +1787,7 @@ DefaultBuild(Relation heapRelation, slot->val = heapTuple; if (ExecQual((List *) oldPred, econtext, false)) { - indtuples++; + indtuples += 1.0; continue; } } @@ -1838,7 +1804,7 @@ DefaultBuild(Relation heapRelation, } #endif /* OMIT_PARTIAL_INDEX */ - indtuples++; + indtuples += 1.0; /* * FormIndexDatum fills in its datum and null parameters with diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c index 88e56869da5..24cc7a8b254 100644 --- a/src/backend/commands/analyze.c +++ b/src/backend/commands/analyze.c @@ -8,19 +8,16 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/commands/analyze.c,v 1.16 2001/03/22 06:16:11 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/commands/analyze.c,v 1.17 2001/05/07 00:43:17 tgl Exp $ * *------------------------------------------------------------------------- */ #include "postgres.h" -#include <sys/types.h> -#include <sys/file.h> -#include <sys/stat.h> -#include <fcntl.h> -#include <unistd.h> +#include <math.h> #include "access/heapam.h" +#include "access/tuptoaster.h" #include "catalog/catname.h" #include "catalog/indexing.h" #include "catalog/pg_operator.h" @@ -29,43 +26,139 @@ #include "commands/vacuum.h" #include "miscadmin.h" #include "parser/parse_oper.h" -#include "tcop/tcopprot.h" #include "utils/acl.h" #include "utils/builtins.h" +#include "utils/datum.h" #include "utils/fmgroids.h" -#include "utils/inval.h" #include "utils/syscache.h" +#include "utils/tuplesort.h" -#define swapLong(a,b) {long tmp; tmp=a; a=b; b=tmp;} -#define swapInt(a,b) {int tmp; tmp=a; a=b; b=tmp;} -#define swapDatum(a,b) {Datum tmp; tmp=a; a=b; b=tmp;} -#define VacAttrStatsEqValid(stats) ( stats->f_cmpeq.fn_addr != NULL ) -#define VacAttrStatsLtGtValid(stats) ( stats->f_cmplt.fn_addr != NULL && \ - stats->f_cmpgt.fn_addr != NULL && \ - RegProcedureIsValid(stats->outfunc) ) +/* + * Analysis algorithms supported + */ +typedef enum { + ALG_MINIMAL = 1, /* Compute only most-common-values */ + ALG_SCALAR /* Compute MCV, histogram, sort correlation */ +} AlgCode; + +/* + * To avoid consuming too much memory during analysis and/or too much space + * in the resulting pg_statistic rows, we ignore varlena datums that are wider + * than WIDTH_THRESHOLD (after detoasting!). This is legitimate for MCV + * and distinct-value calculations since a wide value is unlikely to be + * duplicated at all, much less be a most-common value. For the same reason, + * ignoring wide values will not affect our estimates of histogram bin + * boundaries very much. + */ +#define WIDTH_THRESHOLD 256 + +/* + * We build one of these structs for each attribute (column) that is to be + * analyzed. The struct and subsidiary data are in TransactionCommandContext, + * so they live until the end of the ANALYZE operation. + */ +typedef struct +{ + /* These fields are set up by examine_attribute */ + int attnum; /* attribute number */ + AlgCode algcode; /* Which algorithm to use for this column */ + int minrows; /* Minimum # of rows needed for stats */ + Form_pg_attribute attr; /* copy of pg_attribute row for column */ + Form_pg_type attrtype; /* copy of pg_type row for column */ + Oid eqopr; /* '=' operator for datatype, if any */ + Oid eqfunc; /* and associated function */ + Oid ltopr; /* '<' operator for datatype, if any */ + + /* These fields are filled in by the actual statistics-gathering routine */ + bool stats_valid; + float4 stanullfrac; /* fraction of entries that are NULL */ + int4 stawidth; /* average width */ + float4 stadistinct; /* # distinct values */ + int2 stakind[STATISTIC_NUM_SLOTS]; + Oid staop[STATISTIC_NUM_SLOTS]; + int numnumbers[STATISTIC_NUM_SLOTS]; + float4 *stanumbers[STATISTIC_NUM_SLOTS]; + int numvalues[STATISTIC_NUM_SLOTS]; + Datum *stavalues[STATISTIC_NUM_SLOTS]; +} VacAttrStats; + + +typedef struct +{ + Datum value; /* a data value */ + int tupno; /* position index for tuple it came from */ +} ScalarItem; + +typedef struct +{ + int count; /* # of duplicates */ + int first; /* values[] index of first occurrence */ +} ScalarMCVItem; + + +#define swapInt(a,b) {int _tmp; _tmp=a; a=b; b=_tmp;} +#define swapDatum(a,b) {Datum _tmp; _tmp=a; a=b; b=_tmp;} -static void attr_stats(Relation onerel, int attr_cnt, VacAttrStats *vacattrstats, HeapTuple tuple); -static void bucketcpy(Form_pg_attribute attr, Datum value, Datum *bucket, int *bucket_len); -static void update_attstats(Oid relid, int natts, VacAttrStats *vacattrstats); -static void del_stats(Oid relid, int attcnt, int *attnums); + +static int MESSAGE_LEVEL; + +/* context information for compare_scalars() */ +static FmgrInfo *datumCmpFn; +static SortFunctionKind datumCmpFnKind; +static int *datumCmpTupnoLink; + + +static VacAttrStats *examine_attribute(Relation onerel, int attnum); +static int acquire_sample_rows(Relation onerel, HeapTuple *rows, + int targrows, long *totalrows); +static double random_fract(void); +static double init_selection_state(int n); +static long select_next_random_record(long t, int n, double *stateptr); +static int compare_rows(const void *a, const void *b); +static int compare_scalars(const void *a, const void *b); +static int compare_mcvs(const void *a, const void *b); +static OffsetNumber get_page_max_offset(Relation relation, + BlockNumber blocknumber); +static void compute_minimal_stats(VacAttrStats *stats, + TupleDesc tupDesc, long totalrows, + HeapTuple *rows, int numrows); +static void compute_scalar_stats(VacAttrStats *stats, + TupleDesc tupDesc, long totalrows, + HeapTuple *rows, int numrows); +static void update_attstats(Oid relid, int natts, VacAttrStats **vacattrstats); /* - * analyze_rel() -- analyze relation + * analyze_rel() -- analyze one relation */ void -analyze_rel(Oid relid, List *anal_cols2, int MESSAGE_LEVEL) +analyze_rel(Oid relid, VacuumStmt *vacstmt) { - HeapTuple tuple; Relation onerel; - int32 i; - int attr_cnt, - *attnums = NULL; Form_pg_attribute *attr; - VacAttrStats *vacattrstats; - HeapScanDesc scan; + int attr_cnt, + tcnt, + i; + VacAttrStats **vacattrstats; + int targrows, + numrows; + long totalrows; + HeapTuple *rows; + HeapTuple tuple; + + if (vacstmt->verbose) + MESSAGE_LEVEL = NOTICE; + else + MESSAGE_LEVEL = DEBUG; + /* + * Begin a transaction for analyzing this relation. + * + * Note: All memory allocated during ANALYZE will live in + * TransactionCommandContext or a subcontext thereof, so it will + * all be released by transaction commit at the end of this routine. + */ StartTransactionCommand(); /* @@ -76,7 +169,7 @@ analyze_rel(Oid relid, List *anal_cols2, int MESSAGE_LEVEL) /* * Race condition -- if the pg_class tuple has gone away since the - * last time we saw it, we don't need to vacuum it. + * last time we saw it, we don't need to process it. */ tuple = SearchSysCache(RELOID, ObjectIdGetDatum(relid), @@ -88,8 +181,7 @@ analyze_rel(Oid relid, List *anal_cols2, int MESSAGE_LEVEL) } /* - * We can VACUUM ANALYZE any table except pg_statistic. see - * update_relstats + * We can ANALYZE any table except pg_statistic. See update_attstats */ if (strcmp(NameStr(((Form_pg_class) GETSTRUCT(tuple))->relname), StatisticRelationName) == 0) @@ -100,586 +192,1466 @@ analyze_rel(Oid relid, List *anal_cols2, int MESSAGE_LEVEL) } ReleaseSysCache(tuple); + /* + * Open the class, getting only a read lock on it, and check permissions + */ onerel = heap_open(relid, AccessShareLock); if (!pg_ownercheck(GetUserId(), RelationGetRelationName(onerel), RELNAME)) { - - /* - * we already did an elog during vacuum elog(NOTICE, "Skipping - * \"%s\" --- only table owner can VACUUM it", - * RelationGetRelationName(onerel)); - */ + /* No need for a notice if we already complained during VACUUM */ + if (!vacstmt->vacuum) + elog(NOTICE, "Skipping \"%s\" --- only table owner can ANALYZE it", + RelationGetRelationName(onerel)); heap_close(onerel, NoLock); CommitTransactionCommand(); return; } - elog(MESSAGE_LEVEL, "Analyzing..."); + elog(MESSAGE_LEVEL, "Analyzing %s", RelationGetRelationName(onerel)); - attr_cnt = onerel->rd_att->natts; + /* + * Determine which columns to analyze + * + * Note that system attributes are never analyzed. + */ attr = onerel->rd_att->attrs; + attr_cnt = onerel->rd_att->natts; - if (anal_cols2 != NIL) + if (vacstmt->va_cols != NIL) { - int tcnt = 0; List *le; - if (length(anal_cols2) > attr_cnt) - elog(ERROR, "vacuum: too many attributes specified for relation %s", - RelationGetRelationName(onerel)); - attnums = (int *) palloc(attr_cnt * sizeof(int)); - foreach(le, anal_cols2) + vacattrstats = (VacAttrStats **) palloc(length(vacstmt->va_cols) * + sizeof(VacAttrStats *)); + tcnt = 0; + foreach(le, vacstmt->va_cols) { - char *col = (char *) lfirst(le); + char *col = strVal(lfirst(le)); for (i = 0; i < attr_cnt; i++) { if (namestrcmp(&(attr[i]->attname), col) == 0) break; } - if (i < attr_cnt) /* found */ - attnums[tcnt++] = i; - else - { - elog(ERROR, "vacuum: there is no attribute %s in %s", + if (i >= attr_cnt) + elog(ERROR, "ANALYZE: there is no attribute %s in %s", col, RelationGetRelationName(onerel)); - } + vacattrstats[tcnt] = examine_attribute(onerel, i+1); + if (vacattrstats[tcnt] != NULL) + tcnt++; + } + attr_cnt = tcnt; + } + else + { + vacattrstats = (VacAttrStats **) palloc(attr_cnt * + sizeof(VacAttrStats *)); + tcnt = 0; + for (i = 0; i < attr_cnt; i++) + { + vacattrstats[tcnt] = examine_attribute(onerel, i+1); + if (vacattrstats[tcnt] != NULL) + tcnt++; } attr_cnt = tcnt; } - vacattrstats = (VacAttrStats *) palloc(attr_cnt * sizeof(VacAttrStats)); + /* + * Quit if no analyzable columns + */ + if (attr_cnt <= 0) + { + heap_close(onerel, NoLock); + CommitTransactionCommand(); + return; + } + /* + * Determine how many rows we need to sample, using the worst case + * from all analyzable columns. We use a lower bound of 100 rows + * to avoid possible overflow in Vitter's algorithm. + */ + targrows = 100; for (i = 0; i < attr_cnt; i++) { - Operator func_operator; - VacAttrStats *stats; - - stats = &vacattrstats[i]; - stats->attr = palloc(ATTRIBUTE_TUPLE_SIZE); - memcpy(stats->attr, attr[((attnums) ? attnums[i] : i)], - ATTRIBUTE_TUPLE_SIZE); - stats->best = stats->guess1 = stats->guess2 = 0; - stats->max = stats->min = 0; - stats->best_len = stats->guess1_len = stats->guess2_len = 0; - stats->max_len = stats->min_len = 0; - stats->initialized = false; - stats->best_cnt = stats->guess1_cnt = stats->guess1_hits = stats->guess2_hits = 0; - stats->max_cnt = stats->min_cnt = stats->null_cnt = stats->nonnull_cnt = 0; - - func_operator = compatible_oper("=", - stats->attr->atttypid, - stats->attr->atttypid, - true); - if (func_operator != NULL) - { - fmgr_info(oprfuncid(func_operator), &(stats->f_cmpeq)); - ReleaseSysCache(func_operator); - } - else - stats->f_cmpeq.fn_addr = NULL; + if (targrows < vacattrstats[i]->minrows) + targrows = vacattrstats[i]->minrows; + } + + /* + * Acquire the sample rows + */ + rows = (HeapTuple *) palloc(targrows * sizeof(HeapTuple)); + numrows = acquire_sample_rows(onerel, rows, targrows, &totalrows); - func_operator = compatible_oper("<", - stats->attr->atttypid, - stats->attr->atttypid, - true); - if (func_operator != NULL) + /* + * If we are running a standalone ANALYZE, update pages/tuples stats + * in pg_class. We have the accurate page count from heap_beginscan, + * but only an approximate number of tuples; therefore, if we are + * part of VACUUM ANALYZE do *not* overwrite the accurate count already + * inserted by VACUUM. + */ + if (!vacstmt->vacuum) + vac_update_relstats(RelationGetRelid(onerel), + onerel->rd_nblocks, + (double) totalrows, + RelationGetForm(onerel)->relhasindex); + + /* + * Compute the statistics. Temporary results during the calculations + * for each column are stored in a child context. The calc routines + * are responsible to make sure that whatever they store into the + * VacAttrStats structure is allocated in TransactionCommandContext. + */ + if (numrows > 0) + { + MemoryContext col_context, + old_context; + + col_context = AllocSetContextCreate(CurrentMemoryContext, + "Analyze Column", + ALLOCSET_DEFAULT_MINSIZE, + ALLOCSET_DEFAULT_INITSIZE, + ALLOCSET_DEFAULT_MAXSIZE); + old_context = MemoryContextSwitchTo(col_context); + for (i = 0; i < attr_cnt; i++) { - fmgr_info(oprfuncid(func_operator), &(stats->f_cmplt)); - stats->op_cmplt = oprid(func_operator); - ReleaseSysCache(func_operator); + switch (vacattrstats[i]->algcode) + { + case ALG_MINIMAL: + compute_minimal_stats(vacattrstats[i], + onerel->rd_att, totalrows, + rows, numrows); + break; + case ALG_SCALAR: + compute_scalar_stats(vacattrstats[i], + onerel->rd_att, totalrows, + rows, numrows); + break; + } + MemoryContextResetAndDeleteChildren(col_context); } - else + MemoryContextSwitchTo(old_context); + MemoryContextDelete(col_context); + + /* + * Emit the completed stats rows into pg_statistic, replacing any + * previous statistics for the target columns. (If there are stats + * in pg_statistic for columns we didn't process, we leave them alone.) + */ + update_attstats(relid, attr_cnt, vacattrstats); + } + + /* + * Close source relation now, but keep lock so that no one deletes it + * before we commit. (If someone did, they'd fail to clean up the + * entries we made in pg_statistic.) + */ + heap_close(onerel, NoLock); + + /* Commit and release working memory */ + CommitTransactionCommand(); +} + +/* + * examine_attribute -- pre-analysis of a single column + * + * Determine whether the column is analyzable; if so, create and initialize + * a VacAttrStats struct for it. If not, return NULL. + */ +static VacAttrStats * +examine_attribute(Relation onerel, int attnum) +{ + Form_pg_attribute attr = onerel->rd_att->attrs[attnum-1]; + Operator func_operator; + Oid oprrest; + HeapTuple typtuple; + Oid eqopr = InvalidOid; + Oid eqfunc = InvalidOid; + Oid ltopr = InvalidOid; + VacAttrStats *stats; + + /* Don't analyze column if user has specified not to */ + if (attr->attstattarget <= 0) + return NULL; + + /* If column has no "=" operator, we can't do much of anything */ + func_operator = compatible_oper("=", + attr->atttypid, + attr->atttypid, + true); + if (func_operator != NULL) + { + oprrest = ((Form_pg_operator) GETSTRUCT(func_operator))->oprrest; + if (oprrest == F_EQSEL) { - stats->f_cmplt.fn_addr = NULL; - stats->op_cmplt = InvalidOid; + eqopr = oprid(func_operator); + eqfunc = oprfuncid(func_operator); } + ReleaseSysCache(func_operator); + } + if (!OidIsValid(eqfunc)) + return NULL; - func_operator = compatible_oper(">", - stats->attr->atttypid, - stats->attr->atttypid, - true); - if (func_operator != NULL) + /* + * If we have "=" then we're at least able to do the minimal algorithm, + * so start filling in a VacAttrStats struct. + */ + stats = (VacAttrStats *) palloc(sizeof(VacAttrStats)); + MemSet(stats, 0, sizeof(VacAttrStats)); + stats->attnum = attnum; + stats->attr = (Form_pg_attribute) palloc(ATTRIBUTE_TUPLE_SIZE); + memcpy(stats->attr, attr, ATTRIBUTE_TUPLE_SIZE); + typtuple = SearchSysCache(TYPEOID, + ObjectIdGetDatum(attr->atttypid), + 0, 0, 0); + if (!HeapTupleIsValid(typtuple)) + elog(ERROR, "cache lookup of type %u failed", attr->atttypid); + stats->attrtype = (Form_pg_type) palloc(sizeof(FormData_pg_type)); + memcpy(stats->attrtype, GETSTRUCT(typtuple), sizeof(FormData_pg_type)); + ReleaseSysCache(typtuple); + stats->eqopr = eqopr; + stats->eqfunc = eqfunc; + + /* Is there a "<" operator with suitable semantics? */ + func_operator = compatible_oper("<", + attr->atttypid, + attr->atttypid, + true); + if (func_operator != NULL) + { + oprrest = ((Form_pg_operator) GETSTRUCT(func_operator))->oprrest; + if (oprrest == F_SCALARLTSEL) { - fmgr_info(oprfuncid(func_operator), &(stats->f_cmpgt)); - ReleaseSysCache(func_operator); + ltopr = oprid(func_operator); } - else - stats->f_cmpgt.fn_addr = NULL; + ReleaseSysCache(func_operator); + } + stats->ltopr = ltopr; + + /* + * Determine the algorithm to use (this will get more complicated later) + */ + if (OidIsValid(ltopr)) + { + /* Seems to be a scalar datatype */ + stats->algcode = ALG_SCALAR; + /*-------------------- + * The following choice of minrows is based on the paper + * "Random sampling for histogram construction: how much is enough?" + * by Surajit Chaudhuri, Rajeev Motwani and Vivek Narasayya, in + * Proceedings of ACM SIGMOD International Conference on Management + * of Data, 1998, Pages 436-447. Their Corollary 1 to Theorem 5 + * says that for table size n, histogram size k, maximum relative + * error in bin size f, and error probability gamma, the minimum + * random sample size is + * r = 4 * k * ln(2*n/gamma) / f^2 + * Taking f = 0.5, gamma = 0.01, n = 1 million rows, we obtain + * r = 305.82 * k + * Note that because of the log function, the dependence on n is + * quite weak; even at n = 1 billion, a 300*k sample gives <= 0.59 + * bin size error with probability 0.99. So there's no real need to + * scale for n, which is a good thing because we don't necessarily + * know it at this point. + *-------------------- + */ + stats->minrows = 300 * attr->attstattarget; + } + else + { + /* Can't do much but the minimal stuff */ + stats->algcode = ALG_MINIMAL; + /* Might as well use the same minrows as above */ + stats->minrows = 300 * attr->attstattarget; + } + + return stats; +} - tuple = SearchSysCache(TYPEOID, - ObjectIdGetDatum(stats->attr->atttypid), - 0, 0, 0); - if (HeapTupleIsValid(tuple)) +/* + * acquire_sample_rows -- acquire a random sample of rows from the table + * + * Up to targrows rows are collected (if there are fewer than that many + * rows in the table, all rows are collected). When the table is larger + * than targrows, a truly random sample is collected: every row has an + * equal chance of ending up in the final sample. + * + * We also estimate the total number of rows in the table, and return that + * into *totalrows. + * + * The returned list of tuples is in order by physical position in the table. + * (We will rely on this later to derive correlation estimates.) + */ +static int +acquire_sample_rows(Relation onerel, HeapTuple *rows, int targrows, + long *totalrows) +{ + int numrows = 0; + HeapScanDesc scan; + HeapTuple tuple; + ItemPointer lasttuple; + BlockNumber lastblock, + estblock; + OffsetNumber lastoffset; + int numest; + double tuplesperpage; + long t; + double rstate; + + Assert(targrows > 1); + /* + * Do a simple linear scan until we reach the target number of rows. + */ + scan = heap_beginscan(onerel, false, SnapshotNow, 0, NULL); + while (HeapTupleIsValid(tuple = heap_getnext(scan, 0))) + { + rows[numrows++] = heap_copytuple(tuple); + if (numrows >= targrows) + break; + } + heap_endscan(scan); + /* + * If we ran out of tuples then we're done, no matter how few we + * collected. No sort is needed, since they're already in order. + */ + if (!HeapTupleIsValid(tuple)) + { + *totalrows = numrows; + return numrows; + } + /* + * Otherwise, start replacing tuples in the sample until we reach the + * end of the relation. This algorithm is from Jeff Vitter's paper + * (see full citation below). It works by repeatedly computing the number + * of the next tuple we want to fetch, which will replace a randomly + * chosen element of the reservoir (current set of tuples). At all times + * the reservoir is a true random sample of the tuples we've passed over + * so far, so when we fall off the end of the relation we're done. + * + * A slight difficulty is that since we don't want to fetch tuples or even + * pages that we skip over, it's not possible to fetch *exactly* the N'th + * tuple at each step --- we don't know how many valid tuples are on + * the skipped pages. We handle this by assuming that the average number + * of valid tuples/page on the pages already scanned over holds good for + * the rest of the relation as well; this lets us estimate which page + * the next tuple should be on and its position in the page. Then we + * fetch the first valid tuple at or after that position, being careful + * not to use the same tuple twice. This approach should still give a + * good random sample, although it's not perfect. + */ + lasttuple = &(rows[numrows-1]->t_self); + lastblock = ItemPointerGetBlockNumber(lasttuple); + lastoffset = ItemPointerGetOffsetNumber(lasttuple); + /* + * If possible, estimate tuples/page using only completely-scanned pages. + */ + for (numest = numrows; numest > 0; numest--) + { + if (ItemPointerGetBlockNumber(&(rows[numest-1]->t_self)) != lastblock) + break; + } + if (numest == 0) + { + numest = numrows; /* don't have a full page? */ + estblock = lastblock + 1; + } + else + { + estblock = lastblock; + } + tuplesperpage = (double) numest / (double) estblock; + + t = numrows; /* t is the # of records processed so far */ + rstate = init_selection_state(targrows); + for (;;) + { + double targpos; + BlockNumber targblock; + OffsetNumber targoffset, + maxoffset; + + t = select_next_random_record(t, targrows, &rstate); + /* Try to read the t'th record in the table */ + targpos = (double) t / tuplesperpage; + targblock = (BlockNumber) targpos; + targoffset = ((int) (targpos - targblock) * tuplesperpage) + + FirstOffsetNumber; + /* Make sure we are past the last selected record */ + if (targblock <= lastblock) { - stats->outfunc = ((Form_pg_type) GETSTRUCT(tuple))->typoutput; - stats->typelem = ((Form_pg_type) GETSTRUCT(tuple))->typelem; - ReleaseSysCache(tuple); + targblock = lastblock; + if (targoffset <= lastoffset) + targoffset = lastoffset + 1; } - else + /* Loop to find first valid record at or after given position */ + pageloop:; + /* + * Have we fallen off the end of the relation? (We rely on + * heap_beginscan to have updated rd_nblocks.) + */ + if (targblock >= onerel->rd_nblocks) + break; + maxoffset = get_page_max_offset(onerel, targblock); + for (;;) { - stats->outfunc = InvalidOid; - stats->typelem = InvalidOid; + HeapTupleData targtuple; + Buffer targbuffer; + + if (targoffset > maxoffset) + { + /* Fell off end of this page, try next */ + targblock++; + targoffset = FirstOffsetNumber; + goto pageloop; + } + ItemPointerSet(&targtuple.t_self, targblock, targoffset); + heap_fetch(onerel, SnapshotNow, &targtuple, &targbuffer); + if (targtuple.t_data != NULL) + { + /* + * Found a suitable tuple, so save it, replacing one old + * tuple at random + */ + int k = (int) (targrows * random_fract()); + + Assert(k >= 0 && k < targrows); + heap_freetuple(rows[k]); + rows[k] = heap_copytuple(&targtuple); + ReleaseBuffer(targbuffer); + lastblock = targblock; + lastoffset = targoffset; + break; + } + /* this tuple is dead, so advance to next one on same page */ + targoffset++; } } - /* delete existing pg_statistic rows for relation */ - del_stats(relid, ((attnums) ? attr_cnt : 0), attnums); - - /* scan relation to gather statistics */ - scan = heap_beginscan(onerel, false, SnapshotNow, 0, NULL); - while (HeapTupleIsValid(tuple = heap_getnext(scan, 0))) - attr_stats(onerel, attr_cnt, vacattrstats, tuple); + /* + * Now we need to sort the collected tuples by position (itempointer). + */ + qsort((void *) rows, numrows, sizeof(HeapTuple), compare_rows); - heap_endscan(scan); + /* + * Estimate total number of valid rows in relation. + */ + *totalrows = (long) (onerel->rd_nblocks * tuplesperpage + 0.5); - /* close rel, but keep lock so it doesn't go away before commit */ - heap_close(onerel, NoLock); + return numrows; +} - /* update statistics in pg_class */ - update_attstats(relid, attr_cnt, vacattrstats); +/* Select a random value R uniformly distributed in 0 < R < 1 */ +static double +random_fract(void) +{ + long z; - CommitTransactionCommand(); + /* random() can produce endpoint values, try again if so */ + do + { + z = random(); + } while (! (z > 0 && z < MAX_RANDOM_VALUE)); + return (double) z / (double) MAX_RANDOM_VALUE; } /* - * attr_stats() -- compute column statistics used by the planner + * These two routines embody Algorithm Z from "Random sampling with a + * reservoir" by Jeffrey S. Vitter, in ACM Trans. Math. Softw. 11, 1 + * (Mar. 1985), Pages 37-57. While Vitter describes his algorithm in terms + * of the count S of records to skip before processing another record, + * it is convenient to work primarily with t, the index (counting from 1) + * of the last record processed and next record to process. The only extra + * state needed between calls is W, a random state variable. * - * We compute the column min, max, null and non-null counts. - * Plus we attempt to find the count of the value that occurs most - * frequently in each column. These figures are used to compute - * the selectivity of the column. + * init_selection_state computes the initial W value. * - * We use a three-bucket cache to get the most frequent item. - * The 'guess' buckets count hits. A cache miss causes guess1 - * to get the most hit 'guess' item in the most recent cycle, and - * the new item goes into guess2. Whenever the total count of hits - * of a 'guess' entry is larger than 'best', 'guess' becomes 'best'. + * Given that we've already processed t records (t >= n), + * select_next_random_record determines the number of the next record to + * process. + */ +static double +init_selection_state(int n) +{ + /* Initial value of W (for use when Algorithm Z is first applied) */ + return exp(- log(random_fract())/n); +} + +static long +select_next_random_record(long t, int n, double *stateptr) +{ + /* The magic constant here is T from Vitter's paper */ + if (t <= (22 * n)) + { + /* Process records using Algorithm X until t is large enough */ + double V, + quot; + + V = random_fract(); /* Generate V */ + t++; + quot = (double) (t - n) / (double) t; + /* Find min S satisfying (4.1) */ + while (quot > V) + { + t++; + quot *= (double) (t - n) / (double) t; + } + } + else + { + /* Now apply Algorithm Z */ + double W = *stateptr; + long term = t - n + 1; + int S; + + for (;;) + { + long numer, + numer_lim, + denom; + double U, + X, + lhs, + rhs, + y, + tmp; + + /* Generate U and X */ + U = random_fract(); + X = t * (W - 1.0); + S = X; /* S is tentatively set to floor(X) */ + /* Test if U <= h(S)/cg(X) in the manner of (6.3) */ + tmp = (double) (t + 1) / (double) term; + lhs = exp(log(((U * tmp * tmp) * (term + S))/(t + X))/n); + rhs = (((t + X)/(term + S)) * term)/t; + if (lhs <= rhs) + { + W = rhs/lhs; + break; + } + /* Test if U <= f(S)/cg(X) */ + y = (((U * (t + 1))/term) * (t + S + 1))/(t + X); + if (n < S) + { + denom = t; + numer_lim = term + S; + } + else + { + denom = t - n + S; + numer_lim = t + 1; + } + for (numer = t + S; numer >= numer_lim; numer--) + { + y *= (double) numer / (double) denom; + denom--; + } + W = exp(- log(random_fract())/n); /* Generate W in advance */ + if (exp(log(y)/n) <= (t + X)/t) + break; + } + t += S + 1; + *stateptr = W; + } + return t; +} + +/* + * qsort comparator for sorting rows[] array + */ +static int +compare_rows(const void *a, const void *b) +{ + HeapTuple ha = * (HeapTuple *) a; + HeapTuple hb = * (HeapTuple *) b; + BlockNumber ba = ItemPointerGetBlockNumber(&ha->t_self); + OffsetNumber oa = ItemPointerGetOffsetNumber(&ha->t_self); + BlockNumber bb = ItemPointerGetBlockNumber(&hb->t_self); + OffsetNumber ob = ItemPointerGetOffsetNumber(&hb->t_self); + + if (ba < bb) + return -1; + if (ba > bb) + return 1; + if (oa < ob) + return -1; + if (oa > ob) + return 1; + return 0; +} + +/* + * Discover the largest valid tuple offset number on the given page + * + * This code probably ought to live in some other module. + */ +static OffsetNumber +get_page_max_offset(Relation relation, BlockNumber blocknumber) +{ + Buffer buffer; + Page p; + OffsetNumber offnum; + + buffer = ReadBuffer(relation, blocknumber); + if (!BufferIsValid(buffer)) + elog(ERROR, "get_page_max_offset: %s relation: ReadBuffer(%ld) failed", + RelationGetRelationName(relation), (long) blocknumber); + LockBuffer(buffer, BUFFER_LOCK_SHARE); + p = BufferGetPage(buffer); + offnum = PageGetMaxOffsetNumber(p); + LockBuffer(buffer, BUFFER_LOCK_UNLOCK); + ReleaseBuffer(buffer); + return offnum; +} + + +/* + * compute_minimal_stats() -- compute minimal column statistics * - * This method works perfectly for columns with unique values, and columns - * with only two unique values, plus nulls. + * We use this when we can find only an "=" operator for the datatype. * - * It becomes less perfect as the number of unique values increases and - * their distribution in the table becomes more random. + * We determine the fraction of non-null rows, the average width, the + * most common values, and the (estimated) number of distinct values. * + * The most common values are determined by brute force: we keep a list + * of previously seen values, ordered by number of times seen, as we scan + * the samples. A newly seen value is inserted just after the last + * multiply-seen value, causing the bottommost (oldest) singly-seen value + * to drop off the list. The accuracy of this method, and also its cost, + * depend mainly on the length of the list we are willing to keep. */ static void -attr_stats(Relation onerel, int attr_cnt, VacAttrStats *vacattrstats, HeapTuple tuple) +compute_minimal_stats(VacAttrStats *stats, + TupleDesc tupDesc, long totalrows, + HeapTuple *rows, int numrows) { int i; - TupleDesc tupDesc = onerel->rd_att; - - for (i = 0; i < attr_cnt; i++) + int null_cnt = 0; + int nonnull_cnt = 0; + int toowide_cnt = 0; + double total_width = 0; + bool is_varlena = (!stats->attr->attbyval && + stats->attr->attlen == -1); + FmgrInfo f_cmpeq; + typedef struct + { + Datum value; + int count; + } TrackItem; + TrackItem *track; + int track_cnt, + track_max; + int num_mcv = stats->attr->attstattarget; + + /* We track up to 2*n values for an n-element MCV list; but at least 10 */ + track_max = 2 * num_mcv; + if (track_max < 10) + track_max = 10; + track = (TrackItem *) palloc(track_max * sizeof(TrackItem)); + track_cnt = 0; + + fmgr_info(stats->eqfunc, &f_cmpeq); + + for (i = 0; i < numrows; i++) { - VacAttrStats *stats = &vacattrstats[i]; - Datum origvalue; + HeapTuple tuple = rows[i]; Datum value; bool isnull; - bool value_hit; - - if (!VacAttrStatsEqValid(stats)) - continue; - -#ifdef _DROP_COLUMN_HACK__ - if (COLUMN_IS_DROPPED(stats->attr)) - continue; -#endif /* _DROP_COLUMN_HACK__ */ + bool match; + int firstcount1, + j; - origvalue = heap_getattr(tuple, stats->attr->attnum, - tupDesc, &isnull); + value = heap_getattr(tuple, stats->attnum, tupDesc, &isnull); + /* Check for null/nonnull */ if (isnull) { - stats->null_cnt++; + null_cnt++; continue; } - stats->nonnull_cnt++; + nonnull_cnt++; /* - * If the value is toasted, detoast it to avoid repeated - * detoastings and resultant memory leakage inside the comparison - * routines. + * If it's a varlena field, add up widths for average width + * calculation. Note that if the value is toasted, we + * use the toasted width. We don't bother with this calculation + * if it's a fixed-width type. */ - if (!stats->attr->attbyval && stats->attr->attlen == -1) - value = PointerGetDatum(PG_DETOAST_DATUM(origvalue)); - else - value = origvalue; - - if (!stats->initialized) + if (is_varlena) { - bucketcpy(stats->attr, value, &stats->best, &stats->best_len); - /* best_cnt gets incremented below */ - bucketcpy(stats->attr, value, &stats->guess1, &stats->guess1_len); - stats->guess1_cnt = stats->guess1_hits = 1; - bucketcpy(stats->attr, value, &stats->guess2, &stats->guess2_len); - stats->guess2_hits = 1; - if (VacAttrStatsLtGtValid(stats)) + total_width += VARSIZE(DatumGetPointer(value)); + /* + * If the value is toasted, we want to detoast it just once to + * avoid repeated detoastings and resultant excess memory usage + * during the comparisons. Also, check to see if the value is + * excessively wide, and if so don't detoast at all --- just + * ignore the value. + */ + if (toast_raw_datum_size(value) > WIDTH_THRESHOLD) { - bucketcpy(stats->attr, value, &stats->max, &stats->max_len); - bucketcpy(stats->attr, value, &stats->min, &stats->min_len); - /* min_cnt, max_cnt get incremented below */ + toowide_cnt++; + continue; } - stats->initialized = true; + value = PointerGetDatum(PG_DETOAST_DATUM(value)); } - if (VacAttrStatsLtGtValid(stats)) + /* + * See if the value matches anything we're already tracking. + */ + match = false; + firstcount1 = track_cnt; + for (j = 0; j < track_cnt; j++) { - if (DatumGetBool(FunctionCall2(&stats->f_cmplt, - value, stats->min))) + if (DatumGetBool(FunctionCall2(&f_cmpeq, value, track[j].value))) { - bucketcpy(stats->attr, value, &stats->min, &stats->min_len); - stats->min_cnt = 1; + match = true; + break; } - else if (DatumGetBool(FunctionCall2(&stats->f_cmpeq, - value, stats->min))) - stats->min_cnt++; + if (j < firstcount1 && track[j].count == 1) + firstcount1 = j; + } - if (DatumGetBool(FunctionCall2(&stats->f_cmpgt, - value, stats->max))) + if (match) + { + /* Found a match */ + track[j].count++; + /* This value may now need to "bubble up" in the track list */ + while (j > 0 && track[j].count > track[j-1].count) { - bucketcpy(stats->attr, value, &stats->max, &stats->max_len); - stats->max_cnt = 1; + swapDatum(track[j].value, track[j-1].value); + swapInt(track[j].count, track[j-1].count); + j--; } - else if (DatumGetBool(FunctionCall2(&stats->f_cmpeq, - value, stats->max))) - stats->max_cnt++; } - - value_hit = true; - if (DatumGetBool(FunctionCall2(&stats->f_cmpeq, - value, stats->best))) - stats->best_cnt++; - else if (DatumGetBool(FunctionCall2(&stats->f_cmpeq, - value, stats->guess1))) + else { - stats->guess1_cnt++; - stats->guess1_hits++; + /* No match. Insert at head of count-1 list */ + if (track_cnt < track_max) + track_cnt++; + for (j = track_cnt-1; j > firstcount1; j--) + { + track[j].value = track[j-1].value; + track[j].count = track[j-1].count; + } + if (firstcount1 < track_cnt) + { + track[firstcount1].value = value; + track[firstcount1].count = 1; + } } - else if (DatumGetBool(FunctionCall2(&stats->f_cmpeq, - value, stats->guess2))) - stats->guess2_hits++; + } + + /* We can only compute valid stats if we found some non-null values. */ + if (nonnull_cnt > 0) + { + int nmultiple, + summultiple; + + stats->stats_valid = true; + /* Do the simple null-frac and width stats */ + stats->stanullfrac = (double) null_cnt / (double) numrows; + if (is_varlena) + stats->stawidth = total_width / (double) nonnull_cnt; else - value_hit = false; + stats->stawidth = stats->attrtype->typlen; - if (stats->guess2_hits > stats->guess1_hits) + /* Count the number of values we found multiple times */ + summultiple = 0; + for (nmultiple = 0; nmultiple < track_cnt; nmultiple++) { - swapDatum(stats->guess1, stats->guess2); - swapInt(stats->guess1_len, stats->guess2_len); - swapLong(stats->guess1_hits, stats->guess2_hits); - stats->guess1_cnt = stats->guess1_hits; + if (track[nmultiple].count == 1) + break; + summultiple += track[nmultiple].count; } - if (stats->guess1_cnt > stats->best_cnt) + + if (nmultiple == 0) { - swapDatum(stats->best, stats->guess1); - swapInt(stats->best_len, stats->guess1_len); - swapLong(stats->best_cnt, stats->guess1_cnt); - stats->guess1_hits = 1; - stats->guess2_hits = 1; + /* If we found no repeated values, assume it's a unique column */ + stats->stadistinct = -1.0; } - if (!value_hit) + else if (track_cnt < track_max && toowide_cnt == 0 && + nmultiple == track_cnt) { - bucketcpy(stats->attr, value, &stats->guess2, &stats->guess2_len); - stats->guess1_hits = 1; - stats->guess2_hits = 1; + /* + * Our track list includes every value in the sample, and every + * value appeared more than once. Assume the column has just + * these values. + */ + stats->stadistinct = track_cnt; } + else + { + /*---------- + * Estimate the number of distinct values using the estimator + * proposed by Chaudhuri et al (see citation above). This is + * sqrt(n/r) * max(f1,1) + f2 + f3 + ... + * where fk is the number of distinct values that occurred + * exactly k times in our sample of r rows (from a total of n). + * We assume (not very reliably!) that all the multiply-occurring + * values are reflected in the final track[] list, and the other + * nonnull values all appeared but once. + *---------- + */ + int f1 = nonnull_cnt - summultiple; + double term1; - /* Clean up detoasted copy, if any */ - if (value != origvalue) - pfree(DatumGetPointer(value)); - } -} + if (f1 < 1) + f1 = 1; + term1 = sqrt((double) totalrows / (double) numrows) * f1; + stats->stadistinct = floor(term1 + nmultiple + 0.5); + } -/* - * bucketcpy() -- copy a new value into one of the statistics buckets - */ -static void -bucketcpy(Form_pg_attribute attr, Datum value, Datum *bucket, int *bucket_len) -{ - if (attr->attbyval) - *bucket = value; - else - { - int len = (attr->attlen != -1 ? attr->attlen : VARSIZE(value)); + /* + * If we estimated the number of distinct values at more than 10% + * of the total row count (a very arbitrary limit), then assume + * that stadistinct should scale with the row count rather than be + * a fixed value. + */ + if (stats->stadistinct > 0.1 * totalrows) + stats->stadistinct = - (stats->stadistinct / totalrows); - /* Avoid unnecessary palloc() traffic... */ - if (len > *bucket_len) + /* Generate an MCV slot entry, only if we found multiples */ + if (nmultiple < num_mcv) + num_mcv = nmultiple; + if (num_mcv > 0) { - if (*bucket_len != 0) - pfree(DatumGetPointer(*bucket)); - *bucket = PointerGetDatum(palloc(len)); - *bucket_len = len; + MemoryContext old_context; + Datum *mcv_values; + float4 *mcv_freqs; + + /* Must copy the target values into TransactionCommandContext */ + old_context = MemoryContextSwitchTo(TransactionCommandContext); + mcv_values = (Datum *) palloc(num_mcv * sizeof(Datum)); + mcv_freqs = (float4 *) palloc(num_mcv * sizeof(float4)); + for (i = 0; i < num_mcv; i++) + { + mcv_values[i] = datumCopy(track[i].value, + stats->attr->attbyval, + stats->attr->attlen); + mcv_freqs[i] = (double) track[i].count / (double) numrows; + } + MemoryContextSwitchTo(old_context); + + stats->stakind[0] = STATISTIC_KIND_MCV; + stats->staop[0] = stats->eqopr; + stats->stanumbers[0] = mcv_freqs; + stats->numnumbers[0] = num_mcv; + stats->stavalues[0] = mcv_values; + stats->numvalues[0] = num_mcv; } - memcpy(DatumGetPointer(*bucket), DatumGetPointer(value), len); } + + /* We don't need to bother cleaning up any of our temporary palloc's */ } /* - * update_attstats() -- update attribute statistics for one relation + * compute_scalar_stats() -- compute column statistics * - * Statistics are stored in several places: the pg_class row for the - * relation has stats about the whole relation, the pg_attribute rows - * for each attribute store "dispersion", and there is a pg_statistic - * row for each (non-system) attribute. (Dispersion probably ought to - * be moved to pg_statistic, but it's not worth doing unless there's - * another reason to have to change pg_attribute.) The pg_class values - * are updated by VACUUM, not here. - * - * We violate no-overwrite semantics here by storing new values for - * the dispersion column directly into the pg_attribute tuple that's - * already on the page. The reason for this is that if we updated - * these tuples in the usual way, vacuuming pg_attribute itself - * wouldn't work very well --- by the time we got done with a vacuum - * cycle, most of the tuples in pg_attribute would've been obsoleted. - * Updating pg_attribute's own statistics would be especially tricky. - * Of course, this only works for fixed-size never-null columns, but - * dispersion is. + * We use this when we can find "=" and "<" operators for the datatype. * - * pg_statistic rows are just added normally. This means that - * pg_statistic will probably contain some deleted rows at the - * completion of a vacuum cycle, unless it happens to get vacuumed last. + * We determine the fraction of non-null rows, the average width, the + * most common values, the (estimated) number of distinct values, the + * distribution histogram, and the correlation of physical to logical order. * - * To keep things simple, we punt for pg_statistic, and don't try - * to compute or store rows for pg_statistic itself in pg_statistic. - * This could possibly be made to work, but it's not worth the trouble. + * The desired stats can be determined fairly easily after sorting the + * data values into order. */ static void -update_attstats(Oid relid, int natts, VacAttrStats *vacattrstats) +compute_scalar_stats(VacAttrStats *stats, + TupleDesc tupDesc, long totalrows, + HeapTuple *rows, int numrows) { - Relation ad, - sd; - HeapScanDesc scan; - HeapTuple atup, - stup; - ScanKeyData askey; - Form_pg_attribute attp; - - ad = heap_openr(AttributeRelationName, RowExclusiveLock); - sd = heap_openr(StatisticRelationName, RowExclusiveLock); - - /* Find pg_attribute rows for this relation */ - ScanKeyEntryInitialize(&askey, 0, Anum_pg_attribute_attrelid, - F_INT4EQ, relid); - - scan = heap_beginscan(ad, false, SnapshotNow, 1, &askey); - - while (HeapTupleIsValid(atup = heap_getnext(scan, 0))) + int i; + int null_cnt = 0; + int nonnull_cnt = 0; + int toowide_cnt = 0; + double total_width = 0; + bool is_varlena = (!stats->attr->attbyval && + stats->attr->attlen == -1); + double corr_xysum; + RegProcedure cmpFn; + SortFunctionKind cmpFnKind; + FmgrInfo f_cmpfn; + ScalarItem *values; + int values_cnt = 0; + int *tupnoLink; + ScalarMCVItem *track; + int track_cnt = 0; + int num_mcv = stats->attr->attstattarget; + + values = (ScalarItem *) palloc(numrows * sizeof(ScalarItem)); + tupnoLink = (int *) palloc(numrows * sizeof(int)); + track = (ScalarMCVItem *) palloc(num_mcv * sizeof(ScalarMCVItem)); + + SelectSortFunction(stats->ltopr, &cmpFn, &cmpFnKind); + fmgr_info(cmpFn, &f_cmpfn); + + /* Initial scan to find sortable values */ + for (i = 0; i < numrows; i++) { - int i; - VacAttrStats *stats; + HeapTuple tuple = rows[i]; + Datum value; + bool isnull; - attp = (Form_pg_attribute) GETSTRUCT(atup); - if (attp->attnum <= 0) /* skip system attributes for now */ - continue; + value = heap_getattr(tuple, stats->attnum, tupDesc, &isnull); - for (i = 0; i < natts; i++) + /* Check for null/nonnull */ + if (isnull) { - if (attp->attnum == vacattrstats[i].attr->attnum) - break; + null_cnt++; + continue; } - if (i >= natts) - continue; /* skip attr if no stats collected */ - stats = &(vacattrstats[i]); + nonnull_cnt++; - if (VacAttrStatsEqValid(stats)) + /* + * If it's a varlena field, add up widths for average width + * calculation. Note that if the value is toasted, we + * use the toasted width. We don't bother with this calculation + * if it's a fixed-width type. + */ + if (is_varlena) { - float4 selratio; /* average ratio of rows selected - * for a random constant */ - - /* Compute dispersion */ - if (stats->nonnull_cnt == 0 && stats->null_cnt == 0) + total_width += VARSIZE(DatumGetPointer(value)); + /* + * If the value is toasted, we want to detoast it just once to + * avoid repeated detoastings and resultant excess memory usage + * during the comparisons. Also, check to see if the value is + * excessively wide, and if so don't detoast at all --- just + * ignore the value. + */ + if (toast_raw_datum_size(value) > WIDTH_THRESHOLD) { - - /* - * empty relation, so put a dummy value in attdispersion - */ - selratio = 0; + toowide_cnt++; + continue; } - else if (stats->null_cnt <= 1 && stats->best_cnt == 1) - { + value = PointerGetDatum(PG_DETOAST_DATUM(value)); + } - /* - * looks like we have a unique-key attribute --- flag this - * with special -1.0 flag value. - * - * The correct dispersion is 1.0/numberOfRows, but since the - * relation row count can get updated without recomputing - * dispersion, we want to store a "symbolic" value and - * figure 1.0/numberOfRows on the fly. - */ - selratio = -1; - } - else + /* Add it to the list to be sorted */ + values[values_cnt].value = value; + values[values_cnt].tupno = values_cnt; + tupnoLink[values_cnt] = values_cnt; + values_cnt++; + } + + /* We can only compute valid stats if we found some sortable values. */ + if (values_cnt > 0) + { + int ndistinct, /* # distinct values in sample */ + nmultiple, /* # that appear multiple times */ + num_hist, + dups_cnt; + int slot_idx = 0; + + /* Sort the collected values */ + datumCmpFn = &f_cmpfn; + datumCmpFnKind = cmpFnKind; + datumCmpTupnoLink = tupnoLink; + qsort((void *) values, values_cnt, + sizeof(ScalarItem), compare_scalars); + + /* + * Now scan the values in order, find the most common ones, + * and also accumulate ordering-correlation statistics. + * + * To determine which are most common, we first have to count the + * number of duplicates of each value. The duplicates are adjacent + * in the sorted list, so a brute-force approach is to compare + * successive datum values until we find two that are not equal. + * However, that requires N-1 invocations of the datum comparison + * routine, which are completely redundant with work that was done + * during the sort. (The sort algorithm must at some point have + * compared each pair of items that are adjacent in the sorted order; + * otherwise it could not know that it's ordered the pair correctly.) + * We exploit this by having compare_scalars remember the highest + * tupno index that each ScalarItem has been found equal to. At the + * end of the sort, a ScalarItem's tupnoLink will still point to + * itself if and only if it is the last item of its group of + * duplicates (since the group will be ordered by tupno). + */ + corr_xysum = 0; + ndistinct = 0; + nmultiple = 0; + dups_cnt = 0; + for (i = 0; i < values_cnt; i++) + { + int tupno = values[i].tupno; + + corr_xysum += (double) i * (double) tupno; + dups_cnt++; + if (tupnoLink[tupno] == tupno) { - if (VacAttrStatsLtGtValid(stats) && - stats->min_cnt + stats->max_cnt == stats->nonnull_cnt) + /* Reached end of duplicates of this value */ + ndistinct++; + if (dups_cnt > 1) { + nmultiple++; + if (track_cnt < num_mcv || + dups_cnt > track[track_cnt-1].count) + { + /* + * Found a new item for the mcv list; find its + * position, bubbling down old items if needed. + * Loop invariant is that j points at an empty/ + * replaceable slot. + */ + int j; + + if (track_cnt < num_mcv) + track_cnt++; + for (j = track_cnt-1; j > 0; j--) + { + if (dups_cnt <= track[j-1].count) + break; + track[j].count = track[j-1].count; + track[j].first = track[j-1].first; + } + track[j].count = dups_cnt; + track[j].first = i + 1 - dups_cnt; + } + } + dups_cnt = 0; + } + } - /* - * exact result when there are just 1 or 2 values... - */ - double min_cnt_d = stats->min_cnt, - max_cnt_d = stats->max_cnt, - null_cnt_d = stats->null_cnt; - double total = ((double) stats->nonnull_cnt) + null_cnt_d; + stats->stats_valid = true; + /* Do the simple null-frac and width stats */ + stats->stanullfrac = (double) null_cnt / (double) numrows; + if (is_varlena) + stats->stawidth = total_width / (double) nonnull_cnt; + else + stats->stawidth = stats->attrtype->typlen; - selratio = (min_cnt_d * min_cnt_d + max_cnt_d * max_cnt_d + null_cnt_d * null_cnt_d) / (total * total); - } - else - { - double most = (double) (stats->best_cnt > stats->null_cnt ? stats->best_cnt : stats->null_cnt); - double total = ((double) stats->nonnull_cnt) + ((double) stats->null_cnt); + if (nmultiple == 0) + { + /* If we found no repeated values, assume it's a unique column */ + stats->stadistinct = -1.0; + } + else if (toowide_cnt == 0 && nmultiple == ndistinct) + { + /* + * Every value in the sample appeared more than once. Assume the + * column has just these values. + */ + stats->stadistinct = ndistinct; + } + else + { + /*---------- + * Estimate the number of distinct values using the estimator + * proposed by Chaudhuri et al (see citation above). This is + * sqrt(n/r) * max(f1,1) + f2 + f3 + ... + * where fk is the number of distinct values that occurred + * exactly k times in our sample of r rows (from a total of n). + * Overwidth values are assumed to have been distinct. + *---------- + */ + int f1 = ndistinct - nmultiple + toowide_cnt; + double term1; - /* - * we assume count of other values are 20% of best - * count in table - */ - selratio = (most * most + 0.20 * most * (total - most)) / (total * total); - } - /* Make sure calculated values are in-range */ - if (selratio < 0.0) - selratio = 0.0; - else if (selratio > 1.0) - selratio = 1.0; + if (f1 < 1) + f1 = 1; + term1 = sqrt((double) totalrows / (double) numrows) * f1; + stats->stadistinct = floor(term1 + nmultiple + 0.5); + } + + /* + * If we estimated the number of distinct values at more than 10% + * of the total row count (a very arbitrary limit), then assume + * that stadistinct should scale with the row count rather than be + * a fixed value. + */ + if (stats->stadistinct > 0.1 * totalrows) + stats->stadistinct = - (stats->stadistinct / totalrows); + + /* Generate an MCV slot entry, only if we found multiples */ + if (nmultiple < num_mcv) + num_mcv = nmultiple; + Assert(track_cnt >= num_mcv); + if (num_mcv > 0) + { + MemoryContext old_context; + Datum *mcv_values; + float4 *mcv_freqs; + + /* Must copy the target values into TransactionCommandContext */ + old_context = MemoryContextSwitchTo(TransactionCommandContext); + mcv_values = (Datum *) palloc(num_mcv * sizeof(Datum)); + mcv_freqs = (float4 *) palloc(num_mcv * sizeof(float4)); + for (i = 0; i < num_mcv; i++) + { + mcv_values[i] = datumCopy(values[track[i].first].value, + stats->attr->attbyval, + stats->attr->attlen); + mcv_freqs[i] = (double) track[i].count / (double) numrows; } + MemoryContextSwitchTo(old_context); + + stats->stakind[slot_idx] = STATISTIC_KIND_MCV; + stats->staop[slot_idx] = stats->eqopr; + stats->stanumbers[slot_idx] = mcv_freqs; + stats->numnumbers[slot_idx] = num_mcv; + stats->stavalues[slot_idx] = mcv_values; + stats->numvalues[slot_idx] = num_mcv; + slot_idx++; + } - /* overwrite the existing statistics in the tuple */ - attp->attdispersion = selratio; + /* + * Generate a histogram slot entry if there are at least two + * distinct values not accounted for in the MCV list. (This + * ensures the histogram won't collapse to empty or a singleton.) + */ + num_hist = ndistinct - num_mcv; + if (num_hist > stats->attr->attstattarget) + num_hist = stats->attr->attstattarget + 1; + if (num_hist >= 2) + { + MemoryContext old_context; + Datum *hist_values; + int nvals; - /* invalidate the tuple in the cache and write the buffer */ - RelationInvalidateHeapTuple(ad, atup); - WriteNoReleaseBuffer(scan->rs_cbuf); + /* Sort the MCV items into position order to speed next loop */ + qsort((void *) track, num_mcv, + sizeof(ScalarMCVItem), compare_mcvs); /* - * Create pg_statistic tuples for the relation, if we have - * gathered the right data. del_stats() previously deleted - * all the pg_statistic tuples for the rel, so we just have to - * insert new ones here. + * Collapse out the MCV items from the values[] array. * - * Note analyze_rel() has seen to it that we won't come here when - * vacuuming pg_statistic itself. + * Note we destroy the values[] array here... but we don't need + * it for anything more. We do, however, still need values_cnt. */ - if (VacAttrStatsLtGtValid(stats) && stats->initialized) + if (num_mcv > 0) { - float4 nullratio; - float4 bestratio; - FmgrInfo out_function; - char *out_string; - double best_cnt_d = stats->best_cnt, - null_cnt_d = stats->null_cnt, - nonnull_cnt_d = stats->nonnull_cnt; /* prevent overflow */ - Datum values[Natts_pg_statistic]; - char nulls[Natts_pg_statistic]; - Relation irelations[Num_pg_statistic_indices]; + int src, + dest; + int j; - nullratio = null_cnt_d / (nonnull_cnt_d + null_cnt_d); - bestratio = best_cnt_d / (nonnull_cnt_d + null_cnt_d); - - fmgr_info(stats->outfunc, &out_function); + src = dest = 0; + j = 0; /* index of next interesting MCV item */ + while (src < values_cnt) + { + int ncopy; + + if (j < num_mcv) + { + int first = track[j].first; + + if (src >= first) + { + /* advance past this MCV item */ + src = first + track[j].count; + j++; + continue; + } + ncopy = first - src; + } + else + { + ncopy = values_cnt - src; + } + memmove(&values[dest], &values[src], + ncopy * sizeof(ScalarItem)); + src += ncopy; + dest += ncopy; + } + nvals = dest; + } + else + nvals = values_cnt; + Assert(nvals >= num_hist); - for (i = 0; i < Natts_pg_statistic; ++i) - nulls[i] = ' '; + /* Must copy the target values into TransactionCommandContext */ + old_context = MemoryContextSwitchTo(TransactionCommandContext); + hist_values = (Datum *) palloc(num_hist * sizeof(Datum)); + for (i = 0; i < num_hist; i++) + { + int pos; - /* - * initialize values[] - */ - i = 0; - values[i++] = ObjectIdGetDatum(relid); /* starelid */ - values[i++] = Int16GetDatum(attp->attnum); /* staattnum */ - values[i++] = ObjectIdGetDatum(stats->op_cmplt); /* staop */ - values[i++] = Float4GetDatum(nullratio); /* stanullfrac */ - values[i++] = Float4GetDatum(bestratio); /* stacommonfrac */ - out_string = DatumGetCString(FunctionCall3(&out_function, - stats->best, - ObjectIdGetDatum(stats->typelem), - Int32GetDatum(stats->attr->atttypmod))); - values[i++] = DirectFunctionCall1(textin, /* stacommonval */ - CStringGetDatum(out_string)); - pfree(out_string); - out_string = DatumGetCString(FunctionCall3(&out_function, - stats->min, - ObjectIdGetDatum(stats->typelem), - Int32GetDatum(stats->attr->atttypmod))); - values[i++] = DirectFunctionCall1(textin, /* staloval */ - CStringGetDatum(out_string)); - pfree(out_string); - out_string = DatumGetCString(FunctionCall3(&out_function, - stats->max, - ObjectIdGetDatum(stats->typelem), - Int32GetDatum(stats->attr->atttypmod))); - values[i++] = DirectFunctionCall1(textin, /* stahival */ - CStringGetDatum(out_string)); - pfree(out_string); - - stup = heap_formtuple(sd->rd_att, values, nulls); - - /* store tuple and update indexes too */ - heap_insert(sd, stup); - - CatalogOpenIndices(Num_pg_statistic_indices, Name_pg_statistic_indices, irelations); - CatalogIndexInsert(irelations, Num_pg_statistic_indices, sd, stup); - CatalogCloseIndices(Num_pg_statistic_indices, irelations); - - /* release allocated space */ - pfree(DatumGetPointer(values[Anum_pg_statistic_stacommonval - 1])); - pfree(DatumGetPointer(values[Anum_pg_statistic_staloval - 1])); - pfree(DatumGetPointer(values[Anum_pg_statistic_stahival - 1])); - heap_freetuple(stup); + pos = (i * (nvals - 1)) / (num_hist - 1); + hist_values[i] = datumCopy(values[pos].value, + stats->attr->attbyval, + stats->attr->attlen); } + MemoryContextSwitchTo(old_context); + + stats->stakind[slot_idx] = STATISTIC_KIND_HISTOGRAM; + stats->staop[slot_idx] = stats->ltopr; + stats->stavalues[slot_idx] = hist_values; + stats->numvalues[slot_idx] = num_hist; + slot_idx++; + } + + /* Generate a correlation entry if there are multiple values */ + if (values_cnt > 1) + { + MemoryContext old_context; + float4 *corrs; + double corr_xsum, + corr_x2sum; + + /* Must copy the target values into TransactionCommandContext */ + old_context = MemoryContextSwitchTo(TransactionCommandContext); + corrs = (float4 *) palloc(sizeof(float4)); + MemoryContextSwitchTo(old_context); + + /*---------- + * Since we know the x and y value sets are both + * 0, 1, ..., values_cnt-1 + * we have sum(x) = sum(y) = + * (values_cnt-1)*values_cnt / 2 + * and sum(x^2) = sum(y^2) = + * (values_cnt-1)*values_cnt*(2*values_cnt-1) / 6. + *---------- + */ + corr_xsum = (double) (values_cnt-1) * (double) values_cnt / 2.0; + corr_x2sum = (double) (values_cnt-1) * (double) values_cnt * + (double) (2*values_cnt-1) / 6.0; + /* And the correlation coefficient reduces to */ + corrs[0] = (values_cnt * corr_xysum - corr_xsum * corr_xsum) / + (values_cnt * corr_x2sum - corr_xsum * corr_xsum); + + stats->stakind[slot_idx] = STATISTIC_KIND_CORRELATION; + stats->staop[slot_idx] = stats->ltopr; + stats->stanumbers[slot_idx] = corrs; + stats->numnumbers[slot_idx] = 1; + slot_idx++; } } - heap_endscan(scan); - /* close rels, but hold locks till upcoming commit */ - heap_close(ad, NoLock); - heap_close(sd, NoLock); + + /* We don't need to bother cleaning up any of our temporary palloc's */ } /* - * del_stats() -- delete pg_statistic rows for a relation + * qsort comparator for sorting ScalarItems * - * If a list of attribute numbers is given, only zap stats for those attrs. + * Aside from sorting the items, we update the datumCmpTupnoLink[] array + * whenever two ScalarItems are found to contain equal datums. The array + * is indexed by tupno; for each ScalarItem, it contains the highest + * tupno that that item's datum has been found to be equal to. This allows + * us to avoid additional comparisons in compute_scalar_stats(). */ -static void -del_stats(Oid relid, int attcnt, int *attnums) +static int +compare_scalars(const void *a, const void *b) { - Relation pgstatistic; - HeapScanDesc scan; - HeapTuple tuple; - ScanKeyData key; + Datum da = ((ScalarItem *) a)->value; + int ta = ((ScalarItem *) a)->tupno; + Datum db = ((ScalarItem *) b)->value; + int tb = ((ScalarItem *) b)->tupno; - pgstatistic = heap_openr(StatisticRelationName, RowExclusiveLock); + if (datumCmpFnKind == SORTFUNC_LT) + { + if (DatumGetBool(FunctionCall2(datumCmpFn, da, db))) + return -1; /* a < b */ + if (DatumGetBool(FunctionCall2(datumCmpFn, db, da))) + return 1; /* a > b */ + } + else + { + /* sort function is CMP or REVCMP */ + int32 compare; - ScanKeyEntryInitialize(&key, 0x0, Anum_pg_statistic_starelid, - F_OIDEQ, ObjectIdGetDatum(relid)); - scan = heap_beginscan(pgstatistic, false, SnapshotNow, 1, &key); + compare = DatumGetInt32(FunctionCall2(datumCmpFn, da, db)); + if (compare != 0) + { + if (datumCmpFnKind == SORTFUNC_REVCMP) + compare = -compare; + return compare; + } + } - while (HeapTupleIsValid(tuple = heap_getnext(scan, 0))) + /* + * The two datums are equal, so update datumCmpTupnoLink[]. + */ + if (datumCmpTupnoLink[ta] < tb) + datumCmpTupnoLink[ta] = tb; + if (datumCmpTupnoLink[tb] < ta) + datumCmpTupnoLink[tb] = ta; + + /* + * For equal datums, sort by tupno + */ + return ta - tb; +} + +/* + * qsort comparator for sorting ScalarMCVItems by position + */ +static int +compare_mcvs(const void *a, const void *b) +{ + int da = ((ScalarMCVItem *) a)->first; + int db = ((ScalarMCVItem *) b)->first; + + return da - db; +} + + +/* + * update_attstats() -- update attribute statistics for one relation + * + * Statistics are stored in several places: the pg_class row for the + * relation has stats about the whole relation, and there is a + * pg_statistic row for each (non-system) attribute that has ever + * been analyzed. The pg_class values are updated by VACUUM, not here. + * + * pg_statistic rows are just added or updated normally. This means + * that pg_statistic will probably contain some deleted rows at the + * completion of a vacuum cycle, unless it happens to get vacuumed last. + * + * To keep things simple, we punt for pg_statistic, and don't try + * to compute or store rows for pg_statistic itself in pg_statistic. + * This could possibly be made to work, but it's not worth the trouble. + * Note analyze_rel() has seen to it that we won't come here when + * vacuuming pg_statistic itself. + */ +static void +update_attstats(Oid relid, int natts, VacAttrStats **vacattrstats) +{ + Relation sd; + int attno; + + /* + * We use an ExclusiveLock on pg_statistic to ensure that only one + * backend is writing it at a time --- without that, we might have to + * deal with concurrent updates here, and it's not worth the trouble. + */ + sd = heap_openr(StatisticRelationName, ExclusiveLock); + + for (attno = 0; attno < natts; attno++) { - if (attcnt > 0) + VacAttrStats *stats = vacattrstats[attno]; + FmgrInfo out_function; + HeapTuple stup, + oldtup; + int i, k, n; + Datum values[Natts_pg_statistic]; + char nulls[Natts_pg_statistic]; + char replaces[Natts_pg_statistic]; + Relation irelations[Num_pg_statistic_indices]; + + /* Ignore attr if we weren't able to collect stats */ + if (!stats->stats_valid) + continue; + + fmgr_info(stats->attrtype->typoutput, &out_function); + + /* + * Construct a new pg_statistic tuple + */ + for (i = 0; i < Natts_pg_statistic; ++i) { - Form_pg_statistic pgs = (Form_pg_statistic) GETSTRUCT(tuple); - int i; + nulls[i] = ' '; + replaces[i] = 'r'; + } - for (i = 0; i < attcnt; i++) + i = 0; + values[i++] = ObjectIdGetDatum(relid); /* starelid */ + values[i++] = Int16GetDatum(stats->attnum); /* staattnum */ + values[i++] = Float4GetDatum(stats->stanullfrac); /* stanullfrac */ + values[i++] = Int32GetDatum(stats->stawidth); /* stawidth */ + values[i++] = Float4GetDatum(stats->stadistinct); /* stadistinct */ + for (k = 0; k < STATISTIC_NUM_SLOTS; k++) + { + values[i++] = Int16GetDatum(stats->stakind[k]); /* stakindN */ + } + for (k = 0; k < STATISTIC_NUM_SLOTS; k++) + { + values[i++] = ObjectIdGetDatum(stats->staop[k]); /* staopN */ + } + for (k = 0; k < STATISTIC_NUM_SLOTS; k++) + { + int nnum = stats->numnumbers[k]; + + if (nnum > 0) { - if (pgs->staattnum == attnums[i] + 1) - break; + Datum *numdatums = (Datum *) palloc(nnum * sizeof(Datum)); + ArrayType *arry; + + for (n = 0; n < nnum; n++) + numdatums[n] = Float4GetDatum(stats->stanumbers[k][n]); + /* XXX knows more than it should about type float4: */ + arry = construct_array(numdatums, nnum, + false, sizeof(float4), 'i'); + values[i++] = PointerGetDatum(arry); /* stanumbersN */ + } + else + { + nulls[i] = 'n'; + values[i++] = (Datum) 0; } - if (i >= attcnt) - continue; /* don't delete it */ } - simple_heap_delete(pgstatistic, &tuple->t_self); - } + for (k = 0; k < STATISTIC_NUM_SLOTS; k++) + { + int ntxt = stats->numvalues[k]; - heap_endscan(scan); + if (ntxt > 0) + { + Datum *txtdatums = (Datum *) palloc(ntxt * sizeof(Datum)); + ArrayType *arry; - /* - * Close rel, but *keep* lock; we will need to reacquire it later, so - * there's a possibility of deadlock against another VACUUM process if - * we let go now. Keeping the lock shouldn't delay any common - * operation other than an attempted VACUUM of pg_statistic itself. - */ - heap_close(pgstatistic, NoLock); + for (n = 0; n < ntxt; n++) + { + /* + * Convert data values to a text string to be inserted + * into the text array. + */ + Datum stringdatum; + + stringdatum = + FunctionCall3(&out_function, + stats->stavalues[k][n], + ObjectIdGetDatum(stats->attrtype->typelem), + Int32GetDatum(stats->attr->atttypmod)); + txtdatums[n] = DirectFunctionCall1(textin, stringdatum); + pfree(DatumGetPointer(stringdatum)); + } + /* XXX knows more than it should about type text: */ + arry = construct_array(txtdatums, ntxt, + false, -1, 'i'); + values[i++] = PointerGetDatum(arry); /* stavaluesN */ + } + else + { + nulls[i] = 'n'; + values[i++] = (Datum) 0; + } + } + + /* Is there already a pg_statistic tuple for this attribute? */ + oldtup = SearchSysCache(STATRELATT, + ObjectIdGetDatum(relid), + Int16GetDatum(stats->attnum), + 0, 0); + + if (HeapTupleIsValid(oldtup)) + { + /* Yes, replace it */ + stup = heap_modifytuple(oldtup, + sd, + values, + nulls, + replaces); + ReleaseSysCache(oldtup); + simple_heap_update(sd, &stup->t_self, stup); + } + else + { + /* No, insert new tuple */ + stup = heap_formtuple(sd->rd_att, values, nulls); + heap_insert(sd, stup); + } + + /* update indices too */ + CatalogOpenIndices(Num_pg_statistic_indices, Name_pg_statistic_indices, + irelations); + CatalogIndexInsert(irelations, Num_pg_statistic_indices, sd, stup); + CatalogCloseIndices(Num_pg_statistic_indices, irelations); + + heap_freetuple(stup); + } + + /* close rel, but hold lock till upcoming commit */ + heap_close(sd, NoLock); } diff --git a/src/backend/commands/command.c b/src/backend/commands/command.c index 96d493688e3..13a78f11773 100644 --- a/src/backend/commands/command.c +++ b/src/backend/commands/command.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/commands/Attic/command.c,v 1.125 2001/03/23 04:49:52 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/commands/Attic/command.c,v 1.126 2001/05/07 00:43:17 tgl Exp $ * * NOTES * The PerformAddAttribute() code, like most of the relation @@ -56,6 +56,7 @@ #include "access/genam.h" +static void drop_default(Oid relid, int16 attnum); static bool needs_toast_table(Relation rel); static bool is_relation(char *name); @@ -408,7 +409,7 @@ AlterTableAddColumn(const char *relationName, HeapTuple typeTuple; Form_pg_type tform; char *typename; - int attnelems; + int attndims; if (SearchSysCacheExists(ATTNAME, ObjectIdGetDatum(reltup->t_data->t_oid), @@ -425,11 +426,11 @@ AlterTableAddColumn(const char *relationName, if (colDef->typename->arrayBounds) { - attnelems = length(colDef->typename->arrayBounds); + attndims = length(colDef->typename->arrayBounds); typename = makeArrayTypeName(colDef->typename->name); } else - attnelems = 0; + attndims = 0; typeTuple = SearchSysCache(TYPENAME, PointerGetDatum(typename), @@ -441,12 +442,12 @@ AlterTableAddColumn(const char *relationName, namestrcpy(&(attribute->attname), colDef->colname); attribute->atttypid = typeTuple->t_data->t_oid; attribute->attlen = tform->typlen; - attribute->attdispersion = 0; + attribute->attstattarget = DEFAULT_ATTSTATTARGET; attribute->attcacheoff = -1; attribute->atttypmod = colDef->typename->typmod; attribute->attnum = i; attribute->attbyval = tform->typbyval; - attribute->attnelems = attnelems; + attribute->attndims = attndims; attribute->attisset = (bool) (tform->typtype == 'c'); attribute->attstorage = tform->typstorage; attribute->attalign = tform->typalign; @@ -496,17 +497,13 @@ AlterTableAddColumn(const char *relationName, } - -static void drop_default(Oid relid, int16 attnum); - - /* * ALTER TABLE ALTER COLUMN SET/DROP DEFAULT */ void -AlterTableAlterColumn(const char *relationName, - bool inh, const char *colName, - Node *newDefault) +AlterTableAlterColumnDefault(const char *relationName, + bool inh, const char *colName, + Node *newDefault) { Relation rel; HeapTuple tuple; @@ -551,8 +548,8 @@ AlterTableAlterColumn(const char *relationName, if (childrelid == myrelid) continue; rel = heap_open(childrelid, AccessExclusiveLock); - AlterTableAlterColumn(RelationGetRelationName(rel), - false, colName, newDefault); + AlterTableAlterColumnDefault(RelationGetRelationName(rel), + false, colName, newDefault); heap_close(rel, AccessExclusiveLock); } } @@ -560,7 +557,7 @@ AlterTableAlterColumn(const char *relationName, /* -= now do the thing on this relation =- */ /* reopen the business */ - rel = heap_openr((char *) relationName, AccessExclusiveLock); + rel = heap_openr(relationName, AccessExclusiveLock); /* * get the number of the attribute @@ -647,7 +644,6 @@ AlterTableAlterColumn(const char *relationName, } - static void drop_default(Oid relid, int16 attnum) { @@ -675,6 +671,104 @@ drop_default(Oid relid, int16 attnum) } +/* + * ALTER TABLE ALTER COLUMN SET STATISTICS + */ +void +AlterTableAlterColumnStatistics(const char *relationName, + bool inh, const char *colName, + Node *statsTarget) +{ + Relation rel; + Oid myrelid; + int newtarget; + Relation attrelation; + HeapTuple tuple; + +#ifndef NO_SECURITY + if (!pg_ownercheck(GetUserId(), relationName, RELNAME)) + elog(ERROR, "ALTER TABLE: permission denied"); +#endif + + rel = heap_openr(relationName, AccessExclusiveLock); + if (rel->rd_rel->relkind != RELKIND_RELATION) + elog(ERROR, "ALTER TABLE: relation \"%s\" is not a table", + relationName); + myrelid = RelationGetRelid(rel); + heap_close(rel, NoLock); /* close rel, but keep lock! */ + + /* + * Propagate to children if desired + */ + if (inh) + { + List *child, + *children; + + /* this routine is actually in the planner */ + children = find_all_inheritors(myrelid); + + /* + * find_all_inheritors does the recursive search of the + * inheritance hierarchy, so all we have to do is process all of + * the relids in the list that it returns. + */ + foreach(child, children) + { + Oid childrelid = lfirsti(child); + + if (childrelid == myrelid) + continue; + rel = heap_open(childrelid, AccessExclusiveLock); + AlterTableAlterColumnStatistics(RelationGetRelationName(rel), + false, colName, statsTarget); + heap_close(rel, AccessExclusiveLock); + } + } + + /* -= now do the thing on this relation =- */ + + Assert(IsA(statsTarget, Integer)); + newtarget = intVal(statsTarget); + + /* Limit target to sane range (should we raise an error instead?) */ + if (newtarget < 0) + newtarget = 0; + else if (newtarget > 1000) + newtarget = 1000; + + attrelation = heap_openr(AttributeRelationName, RowExclusiveLock); + + tuple = SearchSysCacheCopy(ATTNAME, + ObjectIdGetDatum(myrelid), + PointerGetDatum(colName), + 0, 0); + if (!HeapTupleIsValid(tuple)) + elog(ERROR, "ALTER TABLE: relation \"%s\" has no column \"%s\"", + relationName, colName); + + if (((Form_pg_attribute) GETSTRUCT(tuple))->attnum < 0) + elog(ERROR, "ALTER TABLE: cannot change system attribute \"%s\"", + colName); + + ((Form_pg_attribute) GETSTRUCT(tuple))->attstattarget = newtarget; + + simple_heap_update(attrelation, &tuple->t_self, tuple); + + /* keep system catalog indices current */ + { + Relation irelations[Num_pg_attr_indices]; + + CatalogOpenIndices(Num_pg_attr_indices, Name_pg_attr_indices, irelations); + CatalogIndexInsert(irelations, Num_pg_attr_indices, attrelation, tuple); + CatalogCloseIndices(Num_pg_attr_indices, irelations); + } + + heap_freetuple(tuple); + heap_close(attrelation, RowExclusiveLock); +} + + #ifdef _DROP_COLUMN_HACK__ /* * ALTER TABLE DROP COLUMN trial implementation diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c index 694d0e8bbc1..9a0dbdc8c8e 100644 --- a/src/backend/commands/vacuum.c +++ b/src/backend/commands/vacuum.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/commands/vacuum.c,v 1.189 2001/03/25 23:23:58 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/commands/vacuum.c,v 1.190 2001/05/07 00:43:18 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -53,25 +53,90 @@ extern XLogRecPtr log_heap_move(Relation reln, Buffer oldbuf, ItemPointerData from, Buffer newbuf, HeapTuple newtup); + +typedef struct VRelListData +{ + Oid vrl_relid; + struct VRelListData *vrl_next; +} VRelListData; + +typedef VRelListData *VRelList; + +typedef struct VacPageData +{ + BlockNumber blkno; /* BlockNumber of this Page */ + Size free; /* FreeSpace on this Page */ + uint16 offsets_used; /* Number of OffNums used by vacuum */ + uint16 offsets_free; /* Number of OffNums free or to be free */ + OffsetNumber offsets[1]; /* Array of its OffNums */ +} VacPageData; + +typedef VacPageData *VacPage; + +typedef struct VacPageListData +{ + int empty_end_pages;/* Number of "empty" end-pages */ + int num_pages; /* Number of pages in pagedesc */ + int num_allocated_pages; /* Number of allocated pages in + * pagedesc */ + VacPage *pagedesc; /* Descriptions of pages */ +} VacPageListData; + +typedef VacPageListData *VacPageList; + +typedef struct VTupleLinkData +{ + ItemPointerData new_tid; + ItemPointerData this_tid; +} VTupleLinkData; + +typedef VTupleLinkData *VTupleLink; + +typedef struct VTupleMoveData +{ + ItemPointerData tid; /* tuple ID */ + VacPage vacpage; /* where to move */ + bool cleanVpd; /* clean vacpage before using */ +} VTupleMoveData; + +typedef VTupleMoveData *VTupleMove; + +typedef struct VRelStats +{ + Oid relid; + long num_pages; + long num_tuples; + Size min_tlen; + Size max_tlen; + bool hasindex; + int num_vtlinks; + VTupleLink vtlinks; +} VRelStats; + + static MemoryContext vac_context = NULL; static int MESSAGE_LEVEL; /* message level */ static TransactionId XmaxRecent; + /* non-export function prototypes */ static void vacuum_init(void); static void vacuum_shutdown(void); -static void vac_vacuum(NameData *VacRelP, bool analyze, List *anal_cols2); -static VRelList getrels(NameData *VacRelP); +static VRelList getrels(Name VacRelP, const char *stmttype); static void vacuum_rel(Oid relid); -static void scan_heap(VRelStats *vacrelstats, Relation onerel, VacPageList vacuum_pages, VacPageList fraged_pages); -static void repair_frag(VRelStats *vacrelstats, Relation onerel, VacPageList vacuum_pages, VacPageList fraged_pages, int nindices, Relation *Irel); -static void vacuum_heap(VRelStats *vacrelstats, Relation onerel, VacPageList vacpagelist); +static void scan_heap(VRelStats *vacrelstats, Relation onerel, + VacPageList vacuum_pages, VacPageList fraged_pages); +static void repair_frag(VRelStats *vacrelstats, Relation onerel, + VacPageList vacuum_pages, VacPageList fraged_pages, + int nindices, Relation *Irel); +static void vacuum_heap(VRelStats *vacrelstats, Relation onerel, + VacPageList vacpagelist); static void vacuum_page(Relation onerel, Buffer buffer, VacPage vacpage); -static void vacuum_index(VacPageList vacpagelist, Relation indrel, int num_tuples, int keep_tuples); -static void scan_index(Relation indrel, int num_tuples); -static void update_relstats(Oid relid, int num_pages, int num_tuples, bool hasindex, VRelStats *vacrelstats); +static void vacuum_index(VacPageList vacpagelist, Relation indrel, + long num_tuples, int keep_tuples); +static void scan_index(Relation indrel, long num_tuples); static VacPage tid_reaped(ItemPointer itemptr, VacPageList vacpagelist); static void reap_page(VacPageList vacpagelist, VacPage vacpage); static void vpage_insert(VacPageList vacpagelist, VacPage vpnew); @@ -88,17 +153,17 @@ static bool enough_space(VacPage vacpage, Size len); static char *show_rusage(struct rusage * ru0); +/* + * Primary entry point for VACUUM and ANALYZE commands. + */ void -vacuum(char *vacrel, bool verbose, bool analyze, List *anal_cols) +vacuum(VacuumStmt *vacstmt) { + const char *stmttype = vacstmt->vacuum ? "VACUUM" : "ANALYZE"; NameData VacRel; Name VacRelName; - MemoryContext old; - List *le; - List *anal_cols2 = NIL; - - if (anal_cols != NIL && !analyze) - elog(ERROR, "Can't vacuum columns, only tables. You can 'vacuum analyze' columns."); + VRelList vrl, + cur; /* * We cannot run VACUUM inside a user transaction block; if we were @@ -110,9 +175,9 @@ vacuum(char *vacrel, bool verbose, bool analyze, List *anal_cols) * behavior. */ if (IsTransactionBlock()) - elog(ERROR, "VACUUM cannot run inside a BEGIN/END block"); + elog(ERROR, "%s cannot run inside a BEGIN/END block", stmttype); - if (verbose) + if (vacstmt->verbose) MESSAGE_LEVEL = NOTICE; else MESSAGE_LEVEL = DEBUG; @@ -130,37 +195,36 @@ vacuum(char *vacrel, bool verbose, bool analyze, List *anal_cols) ALLOCSET_DEFAULT_INITSIZE, ALLOCSET_DEFAULT_MAXSIZE); - /* vacrel gets de-allocated on xact commit, so copy it to safe storage */ - if (vacrel) + /* Convert vacrel, which is just a string, to a Name */ + if (vacstmt->vacrel) { - namestrcpy(&VacRel, vacrel); + namestrcpy(&VacRel, vacstmt->vacrel); VacRelName = &VacRel; } else VacRelName = NULL; - /* must also copy the column list, if any, to safe storage */ - old = MemoryContextSwitchTo(vac_context); - foreach(le, anal_cols) - { - char *col = (char *) lfirst(le); - - anal_cols2 = lappend(anal_cols2, pstrdup(col)); - } - MemoryContextSwitchTo(old); + /* Build list of relations to process (note this lives in vac_context) */ + vrl = getrels(VacRelName, stmttype); /* * Start up the vacuum cleaner. - * - * NOTE: since this commits the current transaction, the memory holding - * any passed-in parameters gets freed here. We must have already - * copied pass-by-reference parameters to safe storage. Don't make me - * fix this again! */ vacuum_init(); - /* vacuum the database */ - vac_vacuum(VacRelName, analyze, anal_cols2); + /* + * Process each selected relation. We are careful to process + * each relation in a separate transaction in order to avoid holding + * too many locks at one time. + */ + for (cur = vrl; cur != (VRelList) NULL; cur = cur->vrl_next) + { + if (vacstmt->vacuum) + vacuum_rel(cur->vrl_relid); + /* analyze separately so locking is minimized */ + if (vacstmt->analyze) + analyze_rel(cur->vrl_relid, vacstmt); + } /* clean up */ vacuum_shutdown(); @@ -187,14 +251,14 @@ vacuum(char *vacrel, bool verbose, bool analyze, List *anal_cols) * PostgresMain(). */ static void -vacuum_init() +vacuum_init(void) { /* matches the StartTransaction in PostgresMain() */ CommitTransactionCommand(); } static void -vacuum_shutdown() +vacuum_shutdown(void) { /* on entry, we are not in a transaction */ @@ -223,34 +287,10 @@ vacuum_shutdown() } /* - * vac_vacuum() -- vacuum the database. - * - * This routine builds a list of relations to vacuum, and then calls - * code that vacuums them one at a time. We are careful to vacuum each - * relation in a separate transaction in order to avoid holding too many - * locks at one time. + * Build a list of VRelListData nodes for each relation to be processed */ -static void -vac_vacuum(NameData *VacRelP, bool analyze, List *anal_cols2) -{ - VRelList vrl, - cur; - - /* get list of relations */ - vrl = getrels(VacRelP); - - /* vacuum each heap relation */ - for (cur = vrl; cur != (VRelList) NULL; cur = cur->vrl_next) - { - vacuum_rel(cur->vrl_relid); - /* analyze separately so locking is minimized */ - if (analyze) - analyze_rel(cur->vrl_relid, anal_cols2, MESSAGE_LEVEL); - } -} - static VRelList -getrels(NameData *VacRelP) +getrels(Name VacRelP, const char *stmttype) { Relation rel; TupleDesc tupdesc; @@ -262,12 +302,9 @@ getrels(NameData *VacRelP) char *rname; char rkind; bool n; - bool found = false; ScanKeyData key; - StartTransactionCommand(); - - if (NameStr(*VacRelP)) + if (VacRelP) { /* @@ -287,6 +324,7 @@ getrels(NameData *VacRelP) } else { + /* find all relations listed in pg_class */ ScanKeyEntryInitialize(&key, 0x0, Anum_pg_class_relkind, F_CHAREQ, CharGetDatum('r')); } @@ -300,21 +338,20 @@ getrels(NameData *VacRelP) while (HeapTupleIsValid(tuple = heap_getnext(scan, 0))) { - found = true; - d = heap_getattr(tuple, Anum_pg_class_relname, tupdesc, &n); - rname = (char *) DatumGetPointer(d); + rname = (char *) DatumGetName(d); d = heap_getattr(tuple, Anum_pg_class_relkind, tupdesc, &n); rkind = DatumGetChar(d); if (rkind != RELKIND_RELATION) { - elog(NOTICE, "Vacuum: can not process indices, views and certain system tables"); + elog(NOTICE, "%s: can not process indexes, views or special system tables", + stmttype); continue; } - /* get a relation list entry for this guy */ + /* Make a relation list entry for this guy */ if (vrl == (VRelList) NULL) vrl = cur = (VRelList) MemoryContextAlloc(vac_context, sizeof(VRelListData)); @@ -332,10 +369,8 @@ getrels(NameData *VacRelP) heap_endscan(scan); heap_close(rel, AccessShareLock); - if (!found) - elog(NOTICE, "Vacuum: table not found"); - - CommitTransactionCommand(); + if (vrl == NULL) + elog(NOTICE, "%s: table not found", stmttype); return vrl; } @@ -432,7 +467,8 @@ vacuum_rel(Oid relid) */ vacrelstats = (VRelStats *) palloc(sizeof(VRelStats)); vacrelstats->relid = relid; - vacrelstats->num_pages = vacrelstats->num_tuples = 0; + vacrelstats->num_pages = 0; + vacrelstats->num_tuples = 0; vacrelstats->hasindex = false; GetXmaxRecent(&XmaxRecent); @@ -457,8 +493,8 @@ vacuum_rel(Oid relid) vacrelstats->hasindex = true; else vacrelstats->hasindex = false; -#ifdef NOT_USED +#ifdef NOT_USED /* * reindex in VACUUM is dangerous under WAL. ifdef out until it * becomes safe. @@ -528,9 +564,8 @@ vacuum_rel(Oid relid) heap_close(onerel, NoLock); /* update statistics in pg_class */ - update_relstats(vacrelstats->relid, vacrelstats->num_pages, - vacrelstats->num_tuples, vacrelstats->hasindex, - vacrelstats); + vac_update_relstats(vacrelstats->relid, vacrelstats->num_pages, + vacrelstats->num_tuples, vacrelstats->hasindex); /* * Complete the transaction and free all temporary memory used. @@ -582,8 +617,8 @@ scan_heap(VRelStats *vacrelstats, Relation onerel, char *relname; VacPage vacpage, vp; + long num_tuples; uint32 tups_vacuumed, - num_tuples, nkeep, nunused, ncrash, @@ -913,7 +948,6 @@ scan_heap(VRelStats *vacrelstats, Relation onerel, /* save stats in the rel list for use later */ vacrelstats->num_tuples = num_tuples; vacrelstats->num_pages = nblocks; -/* vacrelstats->natts = attr_cnt;*/ if (num_tuples == 0) min_tlen = max_tlen = 0; vacrelstats->min_tlen = min_tlen; @@ -960,7 +994,7 @@ scan_heap(VRelStats *vacrelstats, Relation onerel, } elog(MESSAGE_LEVEL, "Pages %u: Changed %u, reaped %u, Empty %u, New %u; \ -Tup %u: Vac %u, Keep/VTL %u/%u, Crash %u, UnUsed %u, MinLen %lu, MaxLen %lu; \ +Tup %lu: Vac %u, Keep/VTL %u/%u, Crash %u, UnUsed %u, MinLen %lu, MaxLen %lu; \ Re-using: Free/Avail. Space %lu/%lu; EndEmpty/Avail. Pages %u/%u. %s", nblocks, changed_pages, vacuum_pages->num_pages, empty_pages, new_pages, num_tuples, tups_vacuumed, @@ -2009,7 +2043,7 @@ vacuum_heap(VRelStats *vacrelstats, Relation onerel, VacPageList vacuum_pages) { Buffer buf; VacPage *vacpage; - int nblocks; + long nblocks; int i; nblocks = vacuum_pages->num_pages; @@ -2044,7 +2078,7 @@ vacuum_heap(VRelStats *vacrelstats, Relation onerel, VacPageList vacuum_pages) /* truncate relation if there are some empty end-pages */ if (vacuum_pages->empty_end_pages > 0) { - elog(MESSAGE_LEVEL, "Rel %s: Pages: %u --> %u.", + elog(MESSAGE_LEVEL, "Rel %s: Pages: %lu --> %lu.", RelationGetRelationName(onerel), vacrelstats->num_pages, nblocks); nblocks = smgrtruncate(DEFAULT_SMGR, onerel, nblocks); @@ -2094,11 +2128,11 @@ vacuum_page(Relation onerel, Buffer buffer, VacPage vacpage) * */ static void -scan_index(Relation indrel, int num_tuples) +scan_index(Relation indrel, long num_tuples) { RetrieveIndexResult res; IndexScanDesc iscan; - int nitups; + long nitups; int nipages; struct rusage ru0; @@ -2119,14 +2153,14 @@ scan_index(Relation indrel, int num_tuples) /* now update statistics in pg_class */ nipages = RelationGetNumberOfBlocks(indrel); - update_relstats(RelationGetRelid(indrel), nipages, nitups, false, NULL); + vac_update_relstats(RelationGetRelid(indrel), nipages, nitups, false); - elog(MESSAGE_LEVEL, "Index %s: Pages %u; Tuples %u. %s", + elog(MESSAGE_LEVEL, "Index %s: Pages %u; Tuples %lu. %s", RelationGetRelationName(indrel), nipages, nitups, show_rusage(&ru0)); if (nitups != num_tuples) - elog(NOTICE, "Index %s: NUMBER OF INDEX' TUPLES (%u) IS NOT THE SAME AS HEAP' (%u).\ + elog(NOTICE, "Index %s: NUMBER OF INDEX' TUPLES (%lu) IS NOT THE SAME AS HEAP' (%lu).\ \n\tRecreate the index.", RelationGetRelationName(indrel), nitups, num_tuples); @@ -2145,13 +2179,14 @@ scan_index(Relation indrel, int num_tuples) * pg_class. */ static void -vacuum_index(VacPageList vacpagelist, Relation indrel, int num_tuples, int keep_tuples) +vacuum_index(VacPageList vacpagelist, Relation indrel, + long num_tuples, int keep_tuples) { RetrieveIndexResult res; IndexScanDesc iscan; ItemPointer heapptr; int tups_vacuumed; - int num_index_tuples; + long num_index_tuples; int num_pages; VacPage vp; struct rusage ru0; @@ -2196,15 +2231,16 @@ vacuum_index(VacPageList vacpagelist, Relation indrel, int num_tuples, int keep_ /* now update statistics in pg_class */ num_pages = RelationGetNumberOfBlocks(indrel); - update_relstats(RelationGetRelid(indrel), num_pages, num_index_tuples, false, NULL); + vac_update_relstats(RelationGetRelid(indrel), + num_pages, num_index_tuples, false); - elog(MESSAGE_LEVEL, "Index %s: Pages %u; Tuples %u: Deleted %u. %s", + elog(MESSAGE_LEVEL, "Index %s: Pages %u; Tuples %lu: Deleted %u. %s", RelationGetRelationName(indrel), num_pages, num_index_tuples - keep_tuples, tups_vacuumed, show_rusage(&ru0)); if (num_index_tuples != num_tuples + keep_tuples) - elog(NOTICE, "Index %s: NUMBER OF INDEX' TUPLES (%u) IS NOT THE SAME AS HEAP' (%u).\ + elog(NOTICE, "Index %s: NUMBER OF INDEX' TUPLES (%lu) IS NOT THE SAME AS HEAP' (%lu).\ \n\tRecreate the index.", RelationGetRelationName(indrel), num_index_tuples, num_tuples); @@ -2255,7 +2291,7 @@ tid_reaped(ItemPointer itemptr, VacPageList vacpagelist) } /* - * update_relstats() -- update statistics for one relation + * vac_update_relstats() -- update statistics for one relation * * Update the whole-relation statistics that are kept in its pg_class * row. There are additional stats that will be updated if we are @@ -2268,13 +2304,12 @@ tid_reaped(ItemPointer itemptr, VacPageList vacpagelist) * we updated these tuples in the usual way, vacuuming pg_class itself * wouldn't work very well --- by the time we got done with a vacuum * cycle, most of the tuples in pg_class would've been obsoleted. - * Updating pg_class's own statistics would be especially tricky. * Of course, this only works for fixed-size never-null columns, but * these are. */ -static void -update_relstats(Oid relid, int num_pages, int num_tuples, bool hasindex, - VRelStats *vacrelstats) +void +vac_update_relstats(Oid relid, long num_pages, double num_tuples, + bool hasindex) { Relation rd; HeapTupleData rtup; diff --git a/src/backend/executor/nodeSort.c b/src/backend/executor/nodeSort.c index 12c6f82a8b2..e0543a28109 100644 --- a/src/backend/executor/nodeSort.c +++ b/src/backend/executor/nodeSort.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/executor/nodeSort.c,v 1.32 2001/03/22 06:16:13 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/executor/nodeSort.c,v 1.33 2001/05/07 00:43:18 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -20,24 +20,24 @@ #include "utils/tuplesort.h" /* ---------------------------------------------------------------- - * FormSortKeys(node) + * ExtractSortKeys * - * Forms the structure containing information used to sort the relation. + * Extract the sorting key information from the plan node. * - * Returns an array of ScanKeyData. + * Returns two palloc'd arrays, one of sort operator OIDs and + * one of attribute numbers. * ---------------------------------------------------------------- */ -static ScanKey -FormSortKeys(Sort *sortnode) +static void +ExtractSortKeys(Sort *sortnode, + Oid **sortOperators, + AttrNumber **attNums) { - ScanKey sortkeys; List *targetList; - List *tl; int keycount; - Resdom *resdom; - AttrNumber resno; - Index reskey; - Oid reskeyop; + Oid *sortOps; + AttrNumber *attNos; + List *tl; /* * get information from the node @@ -46,36 +46,33 @@ FormSortKeys(Sort *sortnode) keycount = sortnode->keycount; /* - * first allocate space for scan keys + * first allocate space for results */ if (keycount <= 0) - elog(ERROR, "FormSortKeys: keycount <= 0"); - sortkeys = (ScanKey) palloc(keycount * sizeof(ScanKeyData)); - MemSet((char *) sortkeys, 0, keycount * sizeof(ScanKeyData)); + elog(ERROR, "ExtractSortKeys: keycount <= 0"); + sortOps = (Oid *) palloc(keycount * sizeof(Oid)); + MemSet(sortOps, 0, keycount * sizeof(Oid)); + *sortOperators = sortOps; + attNos = (AttrNumber *) palloc(keycount * sizeof(AttrNumber)); + MemSet(attNos, 0, keycount * sizeof(AttrNumber)); + *attNums = attNos; /* - * form each scan key from the resdom info in the target list + * extract info from the resdom nodes in the target list */ foreach(tl, targetList) { TargetEntry *target = (TargetEntry *) lfirst(tl); - - resdom = target->resdom; - resno = resdom->resno; - reskey = resdom->reskey; - reskeyop = resdom->reskeyop; + Resdom *resdom = target->resdom; + Index reskey = resdom->reskey; if (reskey > 0) /* ignore TLEs that are not sort keys */ { - ScanKeyEntryInitialize(&sortkeys[reskey - 1], - 0x0, - resno, - (RegProcedure) reskeyop, - (Datum) 0); + Assert(reskey <= keycount); + sortOps[reskey - 1] = resdom->reskeyop; + attNos[reskey - 1] = resdom->resno; } } - - return sortkeys; } /* ---------------------------------------------------------------- @@ -124,8 +121,8 @@ ExecSort(Sort *node) { Plan *outerNode; TupleDesc tupDesc; - int keycount; - ScanKey sortkeys; + Oid *sortOperators; + AttrNumber *attNums; SO1_printf("ExecSort: %s\n", "sorting subplan"); @@ -145,14 +142,17 @@ ExecSort(Sort *node) outerNode = outerPlan((Plan *) node); tupDesc = ExecGetTupType(outerNode); - keycount = node->keycount; - sortkeys = (ScanKey) sortstate->sort_Keys; - tuplesortstate = tuplesort_begin_heap(tupDesc, keycount, sortkeys, - true /* randomAccess */ ); + ExtractSortKeys(node, &sortOperators, &attNums); + tuplesortstate = tuplesort_begin_heap(tupDesc, node->keycount, + sortOperators, attNums, + true /* randomAccess */ ); sortstate->tuplesortstate = (void *) tuplesortstate; + pfree(sortOperators); + pfree(attNums); + /* * Scan the subplan and feed all the tuples to tuplesort. */ @@ -230,7 +230,6 @@ ExecInitSort(Sort *node, EState *estate, Plan *parent) */ sortstate = makeNode(SortState); sortstate->sort_Done = false; - sortstate->sort_Keys = NULL; sortstate->tuplesortstate = NULL; node->sortstate = sortstate; @@ -259,11 +258,6 @@ ExecInitSort(Sort *node, EState *estate, Plan *parent) ExecInitNode(outerPlan, estate, (Plan *) node); /* - * initialize sortstate information - */ - sortstate->sort_Keys = FormSortKeys(node); - - /* * initialize tuple type. no need to initialize projection info * because this node doesn't do projections. */ @@ -321,9 +315,6 @@ ExecEndSort(Sort *node) tuplesort_end((Tuplesortstate *) sortstate->tuplesortstate); sortstate->tuplesortstate = NULL; - if (sortstate->sort_Keys != NULL) - pfree(sortstate->sort_Keys); - pfree(sortstate); node->sortstate = NULL; diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c index ad50630931e..ee5a803b802 100644 --- a/src/backend/nodes/copyfuncs.c +++ b/src/backend/nodes/copyfuncs.c @@ -15,7 +15,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/nodes/copyfuncs.c,v 1.140 2001/03/22 06:16:14 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/nodes/copyfuncs.c,v 1.141 2001/05/07 00:43:18 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -1378,8 +1378,8 @@ _copyRestrictInfo(RestrictInfo *from) newnode->left_pathkey = NIL; newnode->right_pathkey = NIL; newnode->hashjoinoperator = from->hashjoinoperator; - newnode->left_dispersion = from->left_dispersion; - newnode->right_dispersion = from->right_dispersion; + newnode->left_bucketsize = from->left_bucketsize; + newnode->right_bucketsize = from->right_bucketsize; return newnode; } @@ -2209,11 +2209,12 @@ _copyVacuumStmt(VacuumStmt *from) { VacuumStmt *newnode = makeNode(VacuumStmt); - newnode->verbose = from->verbose; + newnode->vacuum = from->vacuum; newnode->analyze = from->analyze; + newnode->verbose = from->verbose; if (from->vacrel) newnode->vacrel = pstrdup(from->vacrel); - Node_Copy(from, newnode, va_spec); + Node_Copy(from, newnode, va_cols); return newnode; } diff --git a/src/backend/nodes/equalfuncs.c b/src/backend/nodes/equalfuncs.c index 06ee63bbacd..284a534aa96 100644 --- a/src/backend/nodes/equalfuncs.c +++ b/src/backend/nodes/equalfuncs.c @@ -20,7 +20,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/nodes/equalfuncs.c,v 1.88 2001/03/22 03:59:31 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/nodes/equalfuncs.c,v 1.89 2001/05/07 00:43:19 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -516,7 +516,7 @@ _equalRestrictInfo(RestrictInfo *a, RestrictInfo *b) return false; /* - * ignore eval_cost, left/right_pathkey, and left/right_dispersion, + * ignore eval_cost, left/right_pathkey, and left/right_bucketsize, * since they may not be set yet, and should be derivable from the * clause anyway */ @@ -1113,13 +1113,15 @@ _equalDropdbStmt(DropdbStmt *a, DropdbStmt *b) static bool _equalVacuumStmt(VacuumStmt *a, VacuumStmt *b) { - if (a->verbose != b->verbose) + if (a->vacuum != b->vacuum) return false; if (a->analyze != b->analyze) return false; + if (a->verbose != b->verbose) + return false; if (!equalstr(a->vacrel, b->vacrel)) return false; - if (!equal(a->va_spec, b->va_spec)) + if (!equal(a->va_cols, b->va_cols)) return false; return true; diff --git a/src/backend/nodes/readfuncs.c b/src/backend/nodes/readfuncs.c index 9a071e7a250..4c0c1b03ef5 100644 --- a/src/backend/nodes/readfuncs.c +++ b/src/backend/nodes/readfuncs.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/nodes/readfuncs.c,v 1.107 2001/03/22 03:59:32 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/nodes/readfuncs.c,v 1.108 2001/05/07 00:43:19 tgl Exp $ * * NOTES * Most of the read functions for plan nodes are tested. (In fact, they @@ -1874,11 +1874,11 @@ _readRestrictInfo(void) /* eval_cost is not part of saved representation; compute on first use */ local_node->eval_cost = -1; - /* ditto for cached pathkeys and dispersion */ + /* ditto for cached pathkeys and bucketsize */ local_node->left_pathkey = NIL; local_node->right_pathkey = NIL; - local_node->left_dispersion = -1; - local_node->right_dispersion = -1; + local_node->left_bucketsize = -1; + local_node->right_bucketsize = -1; return local_node; } diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c index c52af72a16b..bdfbbb18186 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c @@ -41,7 +41,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.70 2001/04/25 22:04:37 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.71 2001/05/07 00:43:20 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -50,11 +50,15 @@ #include <math.h> +#include "catalog/pg_statistic.h" #include "executor/nodeHash.h" #include "miscadmin.h" #include "optimizer/clauses.h" #include "optimizer/cost.h" +#include "optimizer/pathnode.h" +#include "parser/parsetree.h" #include "utils/lsyscache.h" +#include "utils/syscache.h" /* @@ -573,7 +577,7 @@ cost_mergejoin(Path *path, * 'outer_path' is the path for the outer relation * 'inner_path' is the path for the inner relation * 'restrictlist' are the RestrictInfo nodes to be applied at the join - * 'innerdispersion' is an estimate of the dispersion statistic + * 'innerbucketsize' is an estimate of the bucketsize statistic * for the inner hash key. */ void @@ -581,7 +585,7 @@ cost_hashjoin(Path *path, Path *outer_path, Path *inner_path, List *restrictlist, - Selectivity innerdispersion) + Selectivity innerbucketsize) { Cost startup_cost = 0; Cost run_cost = 0; @@ -607,22 +611,20 @@ cost_hashjoin(Path *path, /* * The number of tuple comparisons needed is the number of outer - * tuples times the typical hash bucket size. nodeHash.c tries for - * average bucket loading of NTUP_PER_BUCKET, but that goal will be - * reached only if data values are uniformly distributed among the - * buckets. To be conservative, we scale up the target bucket size by - * the number of inner rows times inner dispersion, giving an estimate - * of the typical number of duplicates of each value. We then charge - * one cpu_operator_cost per tuple comparison. + * tuples times the typical number of tuples in a hash bucket, + * which is the inner relation size times its bucketsize fraction. + * We charge one cpu_operator_cost per tuple comparison. */ run_cost += cpu_operator_cost * outer_path->parent->rows * - NTUP_PER_BUCKET * ceil(inner_path->parent->rows * innerdispersion); + ceil(inner_path->parent->rows * innerbucketsize); /* * Estimate the number of tuples that get through the hashing filter * as one per tuple in the two source relations. This could be a * drastic underestimate if there are many equal-keyed tuples in - * either relation, but we have no good way of estimating that... + * either relation, but we have no simple way of estimating that; + * and since this is only a second-order parameter, it's probably + * not worth expending a lot of effort on the estimate. */ ntuples = outer_path->parent->rows + inner_path->parent->rows; @@ -651,7 +653,7 @@ cost_hashjoin(Path *path, /* * Bias against putting larger relation on inside. We don't want an * absolute prohibition, though, since larger relation might have - * better dispersion --- and we can't trust the size estimates + * better bucketsize --- and we can't trust the size estimates * unreservedly, anyway. Instead, inflate the startup cost by the * square root of the size ratio. (Why square root? No real good * reason, but it seems reasonable...) @@ -663,6 +665,171 @@ cost_hashjoin(Path *path, path->total_cost = startup_cost + run_cost; } +/* + * Estimate hash bucketsize fraction (ie, number of entries in a bucket + * divided by total tuples in relation) if the specified Var is used + * as a hash key. + * + * This statistic is used by cost_hashjoin. We split out the calculation + * because it's useful to cache the result for re-use across multiple path + * cost calculations. + * + * XXX This is really pretty bogus since we're effectively assuming that the + * distribution of hash keys will be the same after applying restriction + * clauses as it was in the underlying relation. However, we are not nearly + * smart enough to figure out how the restrict clauses might change the + * distribution, so this will have to do for now. + * + * The executor tries for average bucket loading of NTUP_PER_BUCKET by setting + * number of buckets equal to ntuples / NTUP_PER_BUCKET, which would yield + * a bucketsize fraction of NTUP_PER_BUCKET / ntuples. But that goal will + * be reached only if the data values are uniformly distributed among the + * buckets, which requires (a) at least ntuples / NTUP_PER_BUCKET distinct + * data values, and (b) a not-too-skewed data distribution. Otherwise the + * buckets will be nonuniformly occupied. If the other relation in the join + * has a similar distribution, the most-loaded buckets are exactly those + * that will be probed most often. Therefore, the "average" bucket size for + * costing purposes should really be taken as something close to the "worst + * case" bucket size. We try to estimate this by first scaling up if there + * are too few distinct data values, and then scaling up again by the + * ratio of the most common value's frequency to the average frequency. + * + * If no statistics are available, use a default estimate of 0.1. This will + * discourage use of a hash rather strongly if the inner relation is large, + * which is what we want. We do not want to hash unless we know that the + * inner rel is well-dispersed (or the alternatives seem much worse). + */ +Selectivity +estimate_hash_bucketsize(Query *root, Var *var) +{ + Oid relid; + RelOptInfo *rel; + HeapTuple tuple; + Form_pg_statistic stats; + double estfract, + ndistinct, + needdistinct, + mcvfreq, + avgfreq; + float4 *numbers; + int nnumbers; + + /* + * Lookup info about var's relation and attribute; + * if none available, return default estimate. + */ + if (!IsA(var, Var)) + return 0.1; + + relid = getrelid(var->varno, root->rtable); + if (relid == InvalidOid) + return 0.1; + + rel = get_base_rel(root, var->varno); + + if (rel->tuples <= 0.0 || rel->rows <= 0.0) + return 0.1; /* ensure we can divide below */ + + tuple = SearchSysCache(STATRELATT, + ObjectIdGetDatum(relid), + Int16GetDatum(var->varattno), + 0, 0); + if (!HeapTupleIsValid(tuple)) + { + /* + * Perhaps the Var is a system attribute; if so, it will have no + * entry in pg_statistic, but we may be able to guess something + * about its distribution anyway. + */ + switch (var->varattno) + { + case ObjectIdAttributeNumber: + case SelfItemPointerAttributeNumber: + /* these are unique, so buckets should be well-distributed */ + return (double) NTUP_PER_BUCKET / rel->rows; + case TableOidAttributeNumber: + /* hashing this is a terrible idea... */ + return 1.0; + } + return 0.1; + } + stats = (Form_pg_statistic) GETSTRUCT(tuple); + + /* + * Obtain number of distinct data values in raw relation. + */ + ndistinct = stats->stadistinct; + if (ndistinct < 0.0) + ndistinct = -ndistinct * rel->tuples; + + /* + * Adjust ndistinct to account for restriction clauses. Observe we are + * assuming that the data distribution is affected uniformly by the + * restriction clauses! + * + * XXX Possibly better way, but much more expensive: multiply by + * selectivity of rel's restriction clauses that mention the target Var. + */ + ndistinct *= rel->rows / rel->tuples; + + /* + * Discourage use of hash join if there seem not to be very many distinct + * data values. The threshold here is somewhat arbitrary, as is the + * fraction used to "discourage" the choice. + */ + if (ndistinct < 50.0) + { + ReleaseSysCache(tuple); + return 0.5; + } + + /* + * Form initial estimate of bucketsize fraction. Here we use rel->rows, + * ie the number of rows after applying restriction clauses, because + * that's what the fraction will eventually be multiplied by in + * cost_heapjoin. + */ + estfract = (double) NTUP_PER_BUCKET / rel->rows; + + /* + * Adjust estimated bucketsize if too few distinct values to fill + * all the buckets. + */ + needdistinct = rel->rows / (double) NTUP_PER_BUCKET; + if (ndistinct < needdistinct) + estfract *= needdistinct / ndistinct; + + /* + * Look up the frequency of the most common value, if available. + */ + mcvfreq = 0.0; + + if (get_attstatsslot(tuple, var->vartype, var->vartypmod, + STATISTIC_KIND_MCV, InvalidOid, + NULL, NULL, &numbers, &nnumbers)) + { + /* + * The first MCV stat is for the most common value. + */ + if (nnumbers > 0) + mcvfreq = numbers[0]; + free_attstatsslot(var->vartype, NULL, 0, + numbers, nnumbers); + } + + /* + * Adjust estimated bucketsize upward to account for skewed distribution. + */ + avgfreq = (1.0 - stats->stanullfrac) / ndistinct; + + if (avgfreq > 0.0 && mcvfreq > avgfreq) + estfract *= mcvfreq / avgfreq; + + ReleaseSysCache(tuple); + + return (Selectivity) estfract; +} + /* * cost_qual_eval diff --git a/src/backend/optimizer/path/joinpath.c b/src/backend/optimizer/path/joinpath.c index d41336ddcee..cd7cabd41de 100644 --- a/src/backend/optimizer/path/joinpath.c +++ b/src/backend/optimizer/path/joinpath.c @@ -8,15 +8,15 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/path/joinpath.c,v 1.63 2001/04/15 00:48:17 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/path/joinpath.c,v 1.64 2001/05/07 00:43:20 tgl Exp $ * *------------------------------------------------------------------------- */ +#include "postgres.h" + #include <sys/types.h> #include <math.h> -#include "postgres.h" - #include "optimizer/clauses.h" #include "optimizer/cost.h" #include "optimizer/pathnode.h" @@ -45,7 +45,6 @@ static void hash_inner_and_outer(Query *root, RelOptInfo *joinrel, List *restrictlist, JoinType jointype); static Path *best_innerjoin(List *join_paths, List *outer_relid, JoinType jointype); -static Selectivity estimate_dispersion(Query *root, Var *var); static List *select_mergejoin_clauses(RelOptInfo *joinrel, RelOptInfo *outerrel, RelOptInfo *innerrel, @@ -722,7 +721,7 @@ hash_inner_and_outer(Query *root, Expr *clause; Var *left, *right; - Selectivity innerdispersion; + Selectivity innerbucketsize; List *hashclauses; if (restrictinfo->hashjoinoperator == InvalidOid) @@ -742,34 +741,34 @@ hash_inner_and_outer(Query *root, /* * Check if clause is usable with these sub-rels, find inner side, - * estimate dispersion of inner var for costing purposes. + * estimate bucketsize of inner var for costing purposes. * * Since we tend to visit the same clauses over and over when - * planning a large query, we cache the dispersion estimates in + * planning a large query, we cache the bucketsize estimates in * the RestrictInfo node to avoid repeated lookups of statistics. */ if (intMember(left->varno, outerrelids) && intMember(right->varno, innerrelids)) { /* righthand side is inner */ - innerdispersion = restrictinfo->right_dispersion; - if (innerdispersion < 0) + innerbucketsize = restrictinfo->right_bucketsize; + if (innerbucketsize < 0) { /* not cached yet */ - innerdispersion = estimate_dispersion(root, right); - restrictinfo->right_dispersion = innerdispersion; + innerbucketsize = estimate_hash_bucketsize(root, right); + restrictinfo->right_bucketsize = innerbucketsize; } } else if (intMember(left->varno, innerrelids) && intMember(right->varno, outerrelids)) { /* lefthand side is inner */ - innerdispersion = restrictinfo->left_dispersion; - if (innerdispersion < 0) + innerbucketsize = restrictinfo->left_bucketsize; + if (innerbucketsize < 0) { /* not cached yet */ - innerdispersion = estimate_dispersion(root, left); - restrictinfo->left_dispersion = innerdispersion; + innerbucketsize = estimate_hash_bucketsize(root, left); + restrictinfo->left_bucketsize = innerbucketsize; } } else @@ -790,7 +789,7 @@ hash_inner_and_outer(Query *root, innerrel->cheapest_total_path, restrictlist, hashclauses, - innerdispersion)); + innerbucketsize)); if (outerrel->cheapest_startup_path != outerrel->cheapest_total_path) add_path(joinrel, (Path *) create_hashjoin_path(joinrel, @@ -799,7 +798,7 @@ hash_inner_and_outer(Query *root, innerrel->cheapest_total_path, restrictlist, hashclauses, - innerdispersion)); + innerbucketsize)); } } @@ -867,31 +866,6 @@ best_innerjoin(List *join_paths, Relids outer_relids, JoinType jointype) } /* - * Estimate dispersion of the specified Var - * - * We use a default of 0.1 if we can't figure out anything better. - * This will typically discourage use of a hash rather strongly, - * if the inner relation is large. We do not want to hash unless - * we know that the inner rel is well-dispersed (or the alternatives - * seem much worse). - */ -static Selectivity -estimate_dispersion(Query *root, Var *var) -{ - Oid relid; - - if (!IsA(var, Var)) - return 0.1; - - relid = getrelid(var->varno, root->rtable); - - if (relid == InvalidOid) - return 0.1; - - return (Selectivity) get_attdispersion(relid, var->varattno, 0.1); -} - -/* * select_mergejoin_clauses * Select mergejoin clauses that are usable for a particular join. * Returns a list of RestrictInfo nodes for those clauses. diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index 8c3b00289d3..2d264c46881 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -10,14 +10,14 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/plan/createplan.c,v 1.104 2001/03/22 03:59:36 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/plan/createplan.c,v 1.105 2001/05/07 00:43:20 tgl Exp $ * *------------------------------------------------------------------------- */ -#include <sys/types.h> - #include "postgres.h" +#include <sys/types.h> + #include "catalog/pg_index.h" #include "nodes/makefuncs.h" #include "nodes/nodeFuncs.h" @@ -1484,9 +1484,9 @@ make_sort_from_pathkeys(List *tlist, Plan *lefttree, List *pathkeys) */ if (resdom->reskey == 0) { - /* OK, mark it as a sort key and set the sort operator regproc */ + /* OK, mark it as a sort key and set the sort operator */ resdom->reskey = ++numsortkeys; - resdom->reskeyop = get_opcode(pathkey->sortop); + resdom->reskeyop = pathkey->sortop; } } diff --git a/src/backend/optimizer/plan/initsplan.c b/src/backend/optimizer/plan/initsplan.c index 7c3e15a8f88..5d67e02dacb 100644 --- a/src/backend/optimizer/plan/initsplan.c +++ b/src/backend/optimizer/plan/initsplan.c @@ -8,13 +8,14 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/plan/initsplan.c,v 1.59 2001/04/16 19:44:10 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/plan/initsplan.c,v 1.60 2001/05/07 00:43:21 tgl Exp $ * *------------------------------------------------------------------------- */ +#include "postgres.h" + #include <sys/types.h> -#include "postgres.h" #include "catalog/pg_operator.h" #include "catalog/pg_type.h" #include "nodes/makefuncs.h" @@ -348,8 +349,8 @@ distribute_qual_to_rels(Query *root, Node *clause, restrictinfo->left_pathkey = NIL; /* not computable yet */ restrictinfo->right_pathkey = NIL; restrictinfo->hashjoinoperator = InvalidOid; - restrictinfo->left_dispersion = -1; /* not computed until needed */ - restrictinfo->right_dispersion = -1; + restrictinfo->left_bucketsize = -1; /* not computed until needed */ + restrictinfo->right_bucketsize = -1; /* * Retrieve all relids and vars contained within the clause. diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index b2ab4600209..0aba4808c16 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/plan/planner.c,v 1.105 2001/04/30 19:24:47 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/plan/planner.c,v 1.106 2001/05/07 00:43:21 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -1367,7 +1367,7 @@ make_groupplan(List *group_tlist, { /* OK, insert the ordering info needed by the executor. */ resdom->reskey = ++keyno; - resdom->reskeyop = get_opcode(grpcl->sortop); + resdom->reskeyop = grpcl->sortop; } } @@ -1412,7 +1412,7 @@ make_sortplan(List *tlist, Plan *plannode, List *sortcls) { /* OK, insert the ordering info needed by the executor. */ resdom->reskey = ++keyno; - resdom->reskeyop = get_opcode(sortcl->sortop); + resdom->reskeyop = sortcl->sortop; } } diff --git a/src/backend/optimizer/prep/prepunion.c b/src/backend/optimizer/prep/prepunion.c index 0b173466cf9..ede4159d970 100644 --- a/src/backend/optimizer/prep/prepunion.c +++ b/src/backend/optimizer/prep/prepunion.c @@ -14,7 +14,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/prep/prepunion.c,v 1.62 2001/03/27 18:02:19 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/prep/prepunion.c,v 1.63 2001/05/07 00:43:22 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -682,8 +682,8 @@ adjust_inherited_attrs_mutator(Node *node, newinfo->eval_cost = -1; /* reset this too */ newinfo->left_pathkey = NIL; /* and these */ newinfo->right_pathkey = NIL; - newinfo->left_dispersion = -1; - newinfo->right_dispersion = -1; + newinfo->left_bucketsize = -1; + newinfo->right_bucketsize = -1; return (Node *) newinfo; } diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c index cfba3ee395f..407c132b4f7 100644 --- a/src/backend/optimizer/util/pathnode.c +++ b/src/backend/optimizer/util/pathnode.c @@ -8,14 +8,14 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/util/pathnode.c,v 1.71 2001/03/22 03:59:39 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/util/pathnode.c,v 1.72 2001/05/07 00:43:22 tgl Exp $ * *------------------------------------------------------------------------- */ -#include <math.h> - #include "postgres.h" +#include <math.h> + #include "nodes/plannodes.h" #include "optimizer/cost.h" #include "optimizer/pathnode.h" @@ -559,7 +559,7 @@ create_mergejoin_path(RelOptInfo *joinrel, * 'restrict_clauses' are the RestrictInfo nodes to apply at the join * 'hashclauses' is a list of the hash join clause (always a 1-element list) * (this should be a subset of the restrict_clauses list) - * 'innerdispersion' is an estimate of the dispersion of the inner hash key + * 'innerbucketsize' is an estimate of the bucketsize of the inner hash key * */ HashPath * @@ -569,7 +569,7 @@ create_hashjoin_path(RelOptInfo *joinrel, Path *inner_path, List *restrict_clauses, List *hashclauses, - Selectivity innerdispersion) + Selectivity innerbucketsize) { HashPath *pathnode = makeNode(HashPath); @@ -587,7 +587,7 @@ create_hashjoin_path(RelOptInfo *joinrel, outer_path, inner_path, restrict_clauses, - innerdispersion); + innerbucketsize); return pathnode; } diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c index 4f711df203c..ee3523553e8 100644 --- a/src/backend/optimizer/util/plancat.c +++ b/src/backend/optimizer/util/plancat.c @@ -9,11 +9,10 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/util/plancat.c,v 1.64 2001/03/22 03:59:40 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/util/plancat.c,v 1.65 2001/05/07 00:43:22 tgl Exp $ * *------------------------------------------------------------------------- */ - #include "postgres.h" #include <math.h> diff --git a/src/backend/parser/analyze.c b/src/backend/parser/analyze.c index 4687a559962..76cc095bc4e 100644 --- a/src/backend/parser/analyze.c +++ b/src/backend/parser/analyze.c @@ -6,7 +6,7 @@ * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $Header: /cvsroot/pgsql/src/backend/parser/analyze.c,v 1.183 2001/03/22 06:16:15 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/parser/analyze.c,v 1.184 2001/05/07 00:43:22 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -2660,7 +2660,7 @@ transformForUpdate(Query *qry, List *forUpdate) /* just the named tables */ foreach(l, forUpdate) { - char *relname = lfirst(l); + char *relname = strVal(lfirst(l)); i = 0; foreach(rt, qry->rtable) diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y index bed0ce239a4..40c379aca51 100644 --- a/src/backend/parser/gram.y +++ b/src/backend/parser/gram.y @@ -11,7 +11,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/parser/gram.y,v 2.222 2001/05/01 01:36:10 thomas Exp $ + * $Header: /cvsroot/pgsql/src/backend/parser/gram.y,v 2.223 2001/05/07 00:43:23 tgl Exp $ * * HISTORY * AUTHOR DATE MAJOR EVENT @@ -104,7 +104,6 @@ static void doNegateFloat(Value *v); char *str; bool boolean; JoinType jtype; - InhOption inhOpt; List *list; Node *node; Value *value; @@ -130,6 +129,7 @@ static void doNegateFloat(Value *v); %type <node> stmt, AlterGroupStmt, AlterSchemaStmt, AlterTableStmt, AlterUserStmt, + AnalyzeStmt, ClosePortalStmt, ClusterStmt, CommentStmt, ConstraintsSetStmt, CopyStmt, CreateAsStmt, CreateGroupStmt, CreatePLangStmt, CreateSchemaStmt, CreateSeqStmt, CreateStmt, CreateTrigStmt, @@ -147,7 +147,7 @@ static void doNegateFloat(Value *v); %type <node> select_no_parens, select_with_parens, select_clause, simple_select -%type <node> alter_column_action +%type <node> alter_column_default %type <ival> drop_behavior %type <list> createdb_opt_list, createdb_opt_item @@ -185,7 +185,7 @@ static void doNegateFloat(Value *v); OptTableElementList, OptInherit, definition, opt_distinct, opt_with, func_args, func_args_list, func_as, oper_argtypes, RuleActionList, RuleActionMulti, - opt_column_list, columnList, opt_va_list, va_list, + opt_column_list, columnList, opt_name_list, sort_clause, sortby_list, index_params, index_list, name_list, from_clause, from_list, opt_array_bounds, expr_list, attrs, target_list, update_target_list, @@ -210,9 +210,7 @@ static void doNegateFloat(Value *v); %type <node> substr_from, substr_for %type <boolean> opt_binary, opt_using, opt_instead, opt_cursor -%type <boolean> opt_with_copy, index_opt_unique, opt_verbose, opt_analyze - -%type <inhOpt> opt_inh_star, opt_only +%type <boolean> opt_with_copy, index_opt_unique, opt_verbose, analyze_keyword %type <ival> copy_dirn, direction, reindex_type, drop_type, opt_column, event, comment_type, comment_cl, @@ -350,7 +348,8 @@ static void doNegateFloat(Value *v); NEW, NOCREATEDB, NOCREATEUSER, NONE, NOTHING, NOTIFY, NOTNULL, OFFSET, OIDS, OPERATOR, OWNER, PASSWORD, PROCEDURAL, REINDEX, RENAME, RESET, RETURNS, ROW, RULE, - SEQUENCE, SERIAL, SETOF, SHARE, SHOW, START, STATEMENT, STDIN, STDOUT, SYSID, + SEQUENCE, SERIAL, SETOF, SHARE, SHOW, START, STATEMENT, + STATISTICS, STDIN, STDOUT, SYSID, TEMP, TEMPLATE, TOAST, TRUNCATE, TRUSTED, UNLISTEN, UNTIL, VACUUM, VALID, VERBOSE, VERSION @@ -470,6 +469,7 @@ stmt : AlterSchemaStmt | CreatedbStmt | DropdbStmt | VacuumStmt + | AnalyzeStmt | VariableSetStmt | VariableShowStmt | VariableResetStmt @@ -938,57 +938,68 @@ CheckPointStmt: CHECKPOINT *****************************************************************************/ AlterTableStmt: -/* ALTER TABLE <name> ADD [COLUMN] <coldef> */ - ALTER TABLE relation_name opt_inh_star ADD opt_column columnDef +/* ALTER TABLE <relation> ADD [COLUMN] <coldef> */ + ALTER TABLE relation_expr ADD opt_column columnDef { AlterTableStmt *n = makeNode(AlterTableStmt); n->subtype = 'A'; - n->relname = $3; - n->inhOpt = $4; - n->def = $7; + n->relname = $3->relname; + n->inhOpt = $3->inhOpt; + n->def = $6; $$ = (Node *)n; } -/* ALTER TABLE <name> ALTER [COLUMN] <colname> {SET DEFAULT <expr>|DROP DEFAULT} */ - | ALTER TABLE relation_name opt_inh_star ALTER opt_column ColId alter_column_action +/* ALTER TABLE <relation> ALTER [COLUMN] <colname> {SET DEFAULT <expr>|DROP DEFAULT} */ + | ALTER TABLE relation_expr ALTER opt_column ColId alter_column_default { AlterTableStmt *n = makeNode(AlterTableStmt); n->subtype = 'T'; - n->relname = $3; - n->inhOpt = $4; - n->name = $7; - n->def = $8; + n->relname = $3->relname; + n->inhOpt = $3->inhOpt; + n->name = $6; + n->def = $7; $$ = (Node *)n; } -/* ALTER TABLE <name> DROP [COLUMN] <name> {RESTRICT|CASCADE} */ - | ALTER TABLE relation_name opt_inh_star DROP opt_column ColId drop_behavior +/* ALTER TABLE <relation> ALTER [COLUMN] <colname> SET STATISTICS <Iconst> */ + | ALTER TABLE relation_expr ALTER opt_column ColId SET STATISTICS Iconst + { + AlterTableStmt *n = makeNode(AlterTableStmt); + n->subtype = 'S'; + n->relname = $3->relname; + n->inhOpt = $3->inhOpt; + n->name = $6; + n->def = (Node *) makeInteger($9); + $$ = (Node *)n; + } +/* ALTER TABLE <relation> DROP [COLUMN] <colname> {RESTRICT|CASCADE} */ + | ALTER TABLE relation_expr DROP opt_column ColId drop_behavior { AlterTableStmt *n = makeNode(AlterTableStmt); n->subtype = 'D'; - n->relname = $3; - n->inhOpt = $4; - n->name = $7; - n->behavior = $8; + n->relname = $3->relname; + n->inhOpt = $3->inhOpt; + n->name = $6; + n->behavior = $7; $$ = (Node *)n; } -/* ALTER TABLE <name> ADD CONSTRAINT ... */ - | ALTER TABLE relation_name opt_inh_star ADD TableConstraint +/* ALTER TABLE <relation> ADD CONSTRAINT ... */ + | ALTER TABLE relation_expr ADD TableConstraint { AlterTableStmt *n = makeNode(AlterTableStmt); n->subtype = 'C'; - n->relname = $3; - n->inhOpt = $4; - n->def = $6; + n->relname = $3->relname; + n->inhOpt = $3->inhOpt; + n->def = $5; $$ = (Node *)n; } -/* ALTER TABLE <name> DROP CONSTRAINT <name> {RESTRICT|CASCADE} */ - | ALTER TABLE relation_name opt_inh_star DROP CONSTRAINT name drop_behavior +/* ALTER TABLE <relation> DROP CONSTRAINT <name> {RESTRICT|CASCADE} */ + | ALTER TABLE relation_expr DROP CONSTRAINT name drop_behavior { AlterTableStmt *n = makeNode(AlterTableStmt); n->subtype = 'X'; - n->relname = $3; - n->inhOpt = $4; - n->name = $7; - n->behavior = $8; + n->relname = $3->relname; + n->inhOpt = $3->inhOpt; + n->name = $6; + n->behavior = $7; $$ = (Node *)n; } /* ALTER TABLE <name> CREATE TOAST TABLE */ @@ -997,6 +1008,7 @@ AlterTableStmt: AlterTableStmt *n = makeNode(AlterTableStmt); n->subtype = 'E'; n->relname = $3; + n->inhOpt = INH_NO; $$ = (Node *)n; } /* ALTER TABLE <name> OWNER TO UserId */ @@ -1005,12 +1017,13 @@ AlterTableStmt: AlterTableStmt *n = makeNode(AlterTableStmt); n->subtype = 'U'; n->relname = $3; + n->inhOpt = INH_NO; n->name = $6; $$ = (Node *)n; } ; -alter_column_action: +alter_column_default: SET DEFAULT a_expr { /* Treat SET DEFAULT NULL the same as DROP DEFAULT */ @@ -1478,10 +1491,6 @@ key_reference: NO ACTION { $$ = FKCONSTR_ON_KEY_NOACTION; } | SET DEFAULT { $$ = FKCONSTR_ON_KEY_SETDEFAULT; } ; -opt_only: ONLY { $$ = INH_NO; } - | /*EMPTY*/ { $$ = INH_DEFAULT; } - ; - OptInherit: INHERITS '(' relation_name_list ')' { $$ = $3; } | /*EMPTY*/ { $$ = NIL; } ; @@ -2598,14 +2607,13 @@ opt_force: FORCE { $$ = TRUE; } * *****************************************************************************/ -RenameStmt: ALTER TABLE relation_name opt_inh_star - RENAME opt_column opt_name TO name +RenameStmt: ALTER TABLE relation_expr RENAME opt_column opt_name TO name { RenameStmt *n = makeNode(RenameStmt); - n->relname = $3; - n->inhOpt = $4; - n->column = $7; - n->newname = $9; + n->relname = $3->relname; + n->inhOpt = $3->inhOpt; + n->column = $6; + n->newname = $8; $$ = (Node *)n; } ; @@ -2994,49 +3002,71 @@ ClusterStmt: CLUSTER index_name ON relation_name * * QUERY: * vacuum + * analyze * *****************************************************************************/ -VacuumStmt: VACUUM opt_verbose opt_analyze +VacuumStmt: VACUUM opt_verbose { VacuumStmt *n = makeNode(VacuumStmt); + n->vacuum = true; + n->analyze = false; n->verbose = $2; - n->analyze = $3; n->vacrel = NULL; - n->va_spec = NIL; + n->va_cols = NIL; $$ = (Node *)n; } - | VACUUM opt_verbose opt_analyze relation_name opt_va_list + | VACUUM opt_verbose relation_name { VacuumStmt *n = makeNode(VacuumStmt); + n->vacuum = true; + n->analyze = false; n->verbose = $2; - n->analyze = $3; - n->vacrel = $4; - n->va_spec = $5; - if ( $5 != NIL && !$4 ) - elog(ERROR,"VACUUM syntax error at or near \"(\"" - "\n\tRelation name must be specified"); + n->vacrel = $3; + n->va_cols = NIL; + $$ = (Node *)n; + } + | VACUUM opt_verbose AnalyzeStmt + { + VacuumStmt *n = (VacuumStmt *) $3; + n->vacuum = true; + n->verbose |= $2; $$ = (Node *)n; } ; -opt_verbose: VERBOSE { $$ = TRUE; } - | /*EMPTY*/ { $$ = FALSE; } +AnalyzeStmt: analyze_keyword opt_verbose + { + VacuumStmt *n = makeNode(VacuumStmt); + n->vacuum = false; + n->analyze = true; + n->verbose = $2; + n->vacrel = NULL; + n->va_cols = NIL; + $$ = (Node *)n; + } + | analyze_keyword opt_verbose relation_name opt_name_list + { + VacuumStmt *n = makeNode(VacuumStmt); + n->vacuum = false; + n->analyze = true; + n->verbose = $2; + n->vacrel = $3; + n->va_cols = $4; + $$ = (Node *)n; + } ; -opt_analyze: ANALYZE { $$ = TRUE; } +analyze_keyword: ANALYZE { $$ = TRUE; } | ANALYSE /* British */ { $$ = TRUE; } - | /*EMPTY*/ { $$ = FALSE; } ; -opt_va_list: '(' va_list ')' { $$ = $2; } - | /*EMPTY*/ { $$ = NIL; } +opt_verbose: VERBOSE { $$ = TRUE; } + | /*EMPTY*/ { $$ = FALSE; } ; -va_list: name - { $$ = makeList1($1); } - | va_list ',' name - { $$ = lappend($1, $3); } +opt_name_list: '(' name_list ')' { $$ = $2; } + | /*EMPTY*/ { $$ = NIL; } ; @@ -3160,12 +3190,12 @@ columnElem: ColId opt_indirection * *****************************************************************************/ -DeleteStmt: DELETE FROM opt_only relation_name where_clause +DeleteStmt: DELETE FROM relation_expr where_clause { DeleteStmt *n = makeNode(DeleteStmt); - n->inhOpt = $3; - n->relname = $4; - n->whereClause = $5; + n->relname = $3->relname; + n->inhOpt = $3->inhOpt; + n->whereClause = $4; $$ = (Node *)n; } ; @@ -3202,17 +3232,17 @@ opt_lmode: SHARE { $$ = TRUE; } * *****************************************************************************/ -UpdateStmt: UPDATE opt_only relation_name +UpdateStmt: UPDATE relation_expr SET update_target_list from_clause where_clause { UpdateStmt *n = makeNode(UpdateStmt); - n->inhOpt = $2; - n->relname = $3; - n->targetList = $5; - n->fromClause = $6; - n->whereClause = $7; + n->relname = $2->relname; + n->inhOpt = $2->inhOpt; + n->targetList = $4; + n->fromClause = $5; + n->whereClause = $6; $$ = (Node *)n; } ; @@ -3545,10 +3575,6 @@ select_offset_value: Iconst * ...however, recursive addattr and rename supported. make special * cases for these. */ -opt_inh_star: '*' { $$ = INH_YES; } - | /*EMPTY*/ { $$ = INH_DEFAULT; } - ; - relation_name_list: name_list; name_list: name @@ -3576,7 +3602,7 @@ opt_for_update_clause: for_update_clause { $$ = $1; } | /* EMPTY */ { $$ = NULL; } ; -update_list: OF va_list { $$ = $2; } +update_list: OF name_list { $$ = $2; } | /* EMPTY */ { $$ = makeList1(NULL); } ; @@ -5525,6 +5551,7 @@ TokenId: ABSOLUTE { $$ = "absolute"; } | SHARE { $$ = "share"; } | START { $$ = "start"; } | STATEMENT { $$ = "statement"; } + | STATISTICS { $$ = "statistics"; } | STDIN { $$ = "stdin"; } | STDOUT { $$ = "stdout"; } | SYSID { $$ = "sysid"; } diff --git a/src/backend/parser/keywords.c b/src/backend/parser/keywords.c index 402dbfd28ca..8ab19f86ae8 100644 --- a/src/backend/parser/keywords.c +++ b/src/backend/parser/keywords.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/parser/keywords.c,v 1.90 2001/03/22 03:59:40 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/parser/keywords.c,v 1.91 2001/05/07 00:43:23 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -238,6 +238,7 @@ static ScanKeyword ScanKeywords[] = { {"some", SOME}, {"start", START}, {"statement", STATEMENT}, + {"statistics", STATISTICS}, {"stdin", STDIN}, {"stdout", STDOUT}, {"substring", SUBSTRING}, diff --git a/src/backend/parser/parse_relation.c b/src/backend/parser/parse_relation.c index f5324cb3735..e1d49842fd2 100644 --- a/src/backend/parser/parse_relation.c +++ b/src/backend/parser/parse_relation.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/parser/parse_relation.c,v 1.54 2001/04/18 17:04:24 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/parser/parse_relation.c,v 1.55 2001/05/07 00:43:23 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -75,7 +75,7 @@ static struct } }; -#define SPECIALS ((int) (sizeof(special_attr)/sizeof(special_attr[0]))) +#define SPECIALS ((int) lengthof(special_attr)) /* @@ -670,7 +670,7 @@ isForUpdate(ParseState *pstate, char *relname) foreach(l, pstate->p_forUpdate) { - char *rname = lfirst(l); + char *rname = strVal(lfirst(l)); if (strcmp(relname, rname) == 0) return true; @@ -1020,20 +1020,6 @@ attnameIsSet(Relation rd, char *name) #endif -#ifdef NOT_USED -/* - * This should only be used if the relation is already - * heap_open()'ed. Use the cache version - * for access to non-opened relations. - */ -int -attnumAttNelems(Relation rd, int attid) -{ - return rd->rd_att->attrs[attid - 1]->attnelems; -} - -#endif - /* given attribute id, return type of that attribute */ /* * This should only be used if the relation is already diff --git a/src/backend/tcop/utility.c b/src/backend/tcop/utility.c index ae6cd20a5db..b616f7e68ef 100644 --- a/src/backend/tcop/utility.c +++ b/src/backend/tcop/utility.c @@ -10,7 +10,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/tcop/utility.c,v 1.109 2001/03/22 06:16:17 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/tcop/utility.c,v 1.110 2001/05/07 00:43:23 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -427,13 +427,19 @@ ProcessUtility(Node *parsetree, interpretInhOption(stmt->inhOpt), (ColumnDef *) stmt->def); break; - case 'T': /* ALTER COLUMN */ - AlterTableAlterColumn(stmt->relname, + case 'T': /* ALTER COLUMN DEFAULT */ + AlterTableAlterColumnDefault(stmt->relname, interpretInhOption(stmt->inhOpt), - stmt->name, - stmt->def); + stmt->name, + stmt->def); break; - case 'D': /* ALTER DROP */ + case 'S': /* ALTER COLUMN STATISTICS */ + AlterTableAlterColumnStatistics(stmt->relname, + interpretInhOption(stmt->inhOpt), + stmt->name, + stmt->def); + break; + case 'D': /* DROP COLUMN */ AlterTableDropColumn(stmt->relname, interpretInhOption(stmt->inhOpt), stmt->name, @@ -703,12 +709,13 @@ ProcessUtility(Node *parsetree, break; case T_VacuumStmt: - set_ps_display(commandTag = "VACUUM"); + if (((VacuumStmt *) parsetree)->vacuum) + commandTag = "VACUUM"; + else + commandTag = "ANALYZE"; + set_ps_display(commandTag); - vacuum(((VacuumStmt *) parsetree)->vacrel, - ((VacuumStmt *) parsetree)->verbose, - ((VacuumStmt *) parsetree)->analyze, - ((VacuumStmt *) parsetree)->va_spec); + vacuum((VacuumStmt *) parsetree); break; case T_ExplainStmt: diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c index 1fe0afb0a35..41ba82db7b5 100644 --- a/src/backend/utils/adt/selfuncs.c +++ b/src/backend/utils/adt/selfuncs.c @@ -15,7 +15,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/utils/adt/selfuncs.c,v 1.87 2001/03/23 04:49:54 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/utils/adt/selfuncs.c,v 1.88 2001/05/07 00:43:23 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -57,9 +57,6 @@ /* default selectivity estimate for pattern-match operators such as LIKE */ #define DEFAULT_MATCH_SEL 0.01 -/* "fudge factor" for estimating frequency of not-most-common values */ -#define NOT_MOST_COMMON_RATIO 0.1 - static bool convert_to_scalar(Datum value, Oid valuetypid, double *scaledvalue, Datum lobound, Datum hibound, Oid boundstypid, double *scaledlobound, double *scaledhibound); @@ -75,17 +72,9 @@ static double convert_one_string_to_scalar(unsigned char *value, static unsigned char *convert_string_datum(Datum value, Oid typid); static double convert_timevalue_to_scalar(Datum value, Oid typid); static void getattproperties(Oid relid, AttrNumber attnum, - Oid *typid, - int *typlen, - bool *typbyval, - int32 *typmod); -static bool getattstatistics(Oid relid, AttrNumber attnum, - Oid typid, int32 typmod, - double *nullfrac, - double *commonfrac, - Datum *commonval, - Datum *loval, - Datum *hival); + Oid *typid, int32 *typmod); +static double get_att_numdistinct(Oid relid, AttrNumber attnum, Oid typid, + Form_pg_statistic stats); static Selectivity prefix_selectivity(char *prefix, Oid relid, AttrNumber attno, @@ -115,134 +104,173 @@ eqsel(PG_FUNCTION_ARGS) AttrNumber attno = PG_GETARG_INT16(2); Datum value = PG_GETARG_DATUM(3); int32 flag = PG_GETARG_INT32(4); - float8 result; - - if (NONVALUE(attno) || NONVALUE(relid)) - result = DEFAULT_EQ_SEL; - else + Oid typid; + int32 typmod; + HeapTuple statsTuple; + Datum *values; + int nvalues; + float4 *numbers; + int nnumbers; + double selec; + + if (NONVALUE(relid) || NONVALUE(attno)) + PG_RETURN_FLOAT8(DEFAULT_EQ_SEL); + + /* get info about the attribute */ + getattproperties(relid, attno, &typid, &typmod); + + /* get stats for the attribute, if available */ + statsTuple = SearchSysCache(STATRELATT, + ObjectIdGetDatum(relid), + Int16GetDatum(attno), + 0, 0); + if (HeapTupleIsValid(statsTuple)) { - Oid typid; - int typlen; - bool typbyval; - int32 typmod; - double nullfrac; - double commonfrac; - Datum commonval; - double selec; - - /* get info about the attribute */ - getattproperties(relid, attno, - &typid, &typlen, &typbyval, &typmod); - - /* get stats for the attribute, if available */ - if (getattstatistics(relid, attno, typid, typmod, - &nullfrac, &commonfrac, &commonval, - NULL, NULL)) - { - if (flag & SEL_CONSTANT) - { + Form_pg_statistic stats; - /* - * Is the constant "=" to the column's most common value? - * (Although the operator may not really be "=", we will - * assume that seeing whether it returns TRUE for the most - * common value is useful information. If you don't like - * it, maybe you shouldn't be using eqsel for your - * operator...) - */ - RegProcedure eqproc = get_opcode(opid); - bool mostcommon; + stats = (Form_pg_statistic) GETSTRUCT(statsTuple); - if (eqproc == (RegProcedure) NULL) - elog(ERROR, "eqsel: no procedure for operator %u", - opid); + if (flag & SEL_CONSTANT) + { + bool match = false; + int i; - /* be careful to apply operator right way 'round */ - if (flag & SEL_RIGHT) - mostcommon = DatumGetBool(OidFunctionCall2(eqproc, - commonval, - value)); - else - mostcommon = DatumGetBool(OidFunctionCall2(eqproc, - value, - commonval)); + /* + * Is the constant "=" to any of the column's most common + * values? (Although the given operator may not really be + * "=", we will assume that seeing whether it returns TRUE + * is an appropriate test. If you don't like this, maybe you + * shouldn't be using eqsel for your operator...) + */ + if (get_attstatsslot(statsTuple, typid, typmod, + STATISTIC_KIND_MCV, InvalidOid, + &values, &nvalues, + &numbers, &nnumbers)) + { + FmgrInfo eqproc; - if (mostcommon) - { + fmgr_info(get_opcode(opid), &eqproc); - /* - * Constant is "=" to the most common value. We know - * selectivity exactly (or as exactly as VACUUM could - * calculate it, anyway). - */ - selec = commonfrac; - } - else + for (i = 0; i < nvalues; i++) { - - /* - * Comparison is against a constant that is neither - * the most common value nor null. Its selectivity - * cannot be more than this: - */ - selec = 1.0 - commonfrac - nullfrac; - if (selec > commonfrac) - selec = commonfrac; - - /* - * and in fact it's probably less, so we should apply - * a fudge factor. The only case where we don't is - * for a boolean column, where indeed we have - * estimated the less-common value's frequency - * exactly! - */ - if (typid != BOOLOID) - selec *= NOT_MOST_COMMON_RATIO; + /* be careful to apply operator right way 'round */ + if (flag & SEL_RIGHT) + match = DatumGetBool(FunctionCall2(&eqproc, + values[i], + value)); + else + match = DatumGetBool(FunctionCall2(&eqproc, + value, + values[i])); + if (match) + break; } } else { + /* no most-common-value info available */ + values = NULL; + numbers = NULL; + i = nvalues = nnumbers = 0; + } + if (match) + { + /* + * Constant is "=" to this common value. We know + * selectivity exactly (or as exactly as VACUUM + * could calculate it, anyway). + */ + selec = numbers[i]; + } + else + { /* - * Search is for a value that we do not know a priori, but - * we will assume it is not NULL. Selectivity cannot be - * more than this: + * Comparison is against a constant that is neither + * NULL nor any of the common values. Its selectivity + * cannot be more than this: */ - selec = 1.0 - nullfrac; - if (selec > commonfrac) - selec = commonfrac; + double sumcommon = 0.0; + double otherdistinct; + for (i = 0; i < nnumbers; i++) + sumcommon += numbers[i]; + selec = 1.0 - sumcommon - stats->stanullfrac; + /* + * and in fact it's probably a good deal less. + * We approximate that all the not-common values + * share this remaining fraction equally, so we + * divide by the number of other distinct values. + */ + otherdistinct = get_att_numdistinct(relid, attno, + typid, stats) + - nnumbers; + if (otherdistinct > 1) + selec /= otherdistinct; /* - * and in fact it's probably less, so apply a fudge - * factor. + * Another cross-check: selectivity shouldn't be + * estimated as more than the least common + * "most common value". */ - selec *= NOT_MOST_COMMON_RATIO; + if (nnumbers > 0 && selec > numbers[nnumbers-1]) + selec = numbers[nnumbers-1]; } - /* result should be in range, but make sure... */ - if (selec < 0.0) - selec = 0.0; - else if (selec > 1.0) - selec = 1.0; - - if (!typbyval) - pfree(DatumGetPointer(commonval)); + free_attstatsslot(typid, values, nvalues, numbers, nnumbers); } else { + double ndistinct; /* - * No VACUUM ANALYZE stats available, so make a guess using - * the dispersion stat (if we have that, which is unlikely for - * a normal attribute; but for a system attribute we may be - * able to estimate it). + * Search is for a value that we do not know a priori, but + * we will assume it is not NULL. Estimate the selectivity + * as non-null fraction divided by number of distinct values, + * so that we get a result averaged over all possible values + * whether common or uncommon. (Essentially, we are assuming + * that the not-yet-known comparison value is equally likely + * to be any of the possible values, regardless of their + * frequency in the table. Is that a good idea?) + */ + selec = 1.0 - stats->stanullfrac; + ndistinct = get_att_numdistinct(relid, attno, typid, stats); + if (ndistinct > 1) + selec /= ndistinct; + /* + * Cross-check: selectivity should never be + * estimated as more than the most common value's. */ - selec = get_attdispersion(relid, attno, 0.01); + if (get_attstatsslot(statsTuple, typid, typmod, + STATISTIC_KIND_MCV, InvalidOid, + NULL, NULL, + &numbers, &nnumbers)) + { + if (nnumbers > 0 && selec > numbers[0]) + selec = numbers[0]; + free_attstatsslot(typid, NULL, 0, numbers, nnumbers); + } } - result = (float8) selec; + ReleaseSysCache(statsTuple); } - PG_RETURN_FLOAT8(result); + else + { + /* + * No VACUUM ANALYZE stats available, so make a guess using + * estimated number of distinct values and assuming they are + * equally common. (The guess is unlikely to be very good, + * but we do know a few special cases.) + */ + selec = 1.0 / get_att_numdistinct(relid, attno, typid, NULL); + } + + /* result should be in range, but make sure... */ + if (selec < 0.0) + selec = 0.0; + else if (selec > 1.0) + selec = 1.0; + + PG_RETURN_FLOAT8((float8) selec); } /* @@ -301,117 +329,263 @@ scalarltsel(PG_FUNCTION_ARGS) AttrNumber attno = PG_GETARG_INT16(2); Datum value = PG_GETARG_DATUM(3); int32 flag = PG_GETARG_INT32(4); - float8 result; + bool isgt; + HeapTuple oprTuple; + HeapTuple statsTuple; + Form_pg_statistic stats; + Oid contype; + FmgrInfo opproc; + Oid typid; + int32 typmod; + Datum *values; + int nvalues; + float4 *numbers; + int nnumbers; + double mcv_selec, + hist_selec, + sumcommon; + double selec; + int i; + + if (NONVALUE(relid) || NONVALUE(attno)) + PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL); + + /* Can't do anything useful if no constant to compare against, either */ + if (!(flag & SEL_CONSTANT)) + PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL); - if (!(flag & SEL_CONSTANT) || NONVALUE(attno) || NONVALUE(relid)) - result = DEFAULT_INEQ_SEL; + /* + * Force the constant to be on the right to simplify later logic. + * This means that we may be dealing with either "<" or ">" cases. + */ + if (flag & SEL_RIGHT) + { + /* we have x < const */ + isgt = false; + } else { - HeapTuple oprtuple; - Oid ltype, - rtype, - contype; - Oid typid; - int typlen; - bool typbyval; - int32 typmod; - Datum hival, - loval; - double val, - high, - low, - numerator, - denominator; - - /* - * Get left and right datatypes of the operator so we know what - * type the constant is. - */ - oprtuple = SearchSysCache(OPEROID, - ObjectIdGetDatum(opid), - 0, 0, 0); - if (!HeapTupleIsValid(oprtuple)) - elog(ERROR, "scalarltsel: no tuple for operator %u", opid); - ltype = ((Form_pg_operator) GETSTRUCT(oprtuple))->oprleft; - rtype = ((Form_pg_operator) GETSTRUCT(oprtuple))->oprright; - contype = (flag & SEL_RIGHT) ? rtype : ltype; - ReleaseSysCache(oprtuple); - - /* Now get info and stats about the attribute */ - getattproperties(relid, attno, - &typid, &typlen, &typbyval, &typmod); - - if (!getattstatistics(relid, attno, typid, typmod, - NULL, NULL, NULL, - &loval, &hival)) + /* we have const < x, commute to make x > const */ + opid = get_commutator(opid); + if (!opid) { - /* no stats available, so default result */ + /* Use default selectivity (should we raise an error instead?) */ PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL); } + isgt = true; + } - /* Convert the values to a uniform comparison scale. */ - if (!convert_to_scalar(value, contype, &val, - loval, hival, typid, - &low, &high)) - { + /* + * The constant might not be the same datatype as the column; + * look at the operator's input types to find out what it is. + * Also set up to be able to call the operator's execution proc. + */ + oprTuple = SearchSysCache(OPEROID, + ObjectIdGetDatum(opid), + 0, 0, 0); + if (!HeapTupleIsValid(oprTuple)) + elog(ERROR, "scalarltsel: no tuple for operator %u", opid); + contype = ((Form_pg_operator) GETSTRUCT(oprTuple))->oprright; + fmgr_info(((Form_pg_operator) GETSTRUCT(oprTuple))->oprcode, &opproc); + ReleaseSysCache(oprTuple); + + /* Now get info and stats about the attribute */ + getattproperties(relid, attno, &typid, &typmod); + + statsTuple = SearchSysCache(STATRELATT, + ObjectIdGetDatum(relid), + Int16GetDatum(attno), + 0, 0); + if (!HeapTupleIsValid(statsTuple)) + { + /* no stats available, so default result */ + PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL); + } + stats = (Form_pg_statistic) GETSTRUCT(statsTuple); - /* - * Ideally we'd produce an error here, on the grounds that the - * given operator shouldn't have scalarltsel registered as its - * selectivity func unless we can deal with its operand types. - * But currently, all manner of stuff is invoking scalarltsel, - * so give a default estimate until that can be fixed. - */ - if (!typbyval) - { - pfree(DatumGetPointer(hival)); - pfree(DatumGetPointer(loval)); - } - PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL); - } + /* + * If we have most-common-values info, add up the fractions of the + * MCV entries that satisfy MCV OP CONST. These fractions contribute + * directly to the result selectivity. Also add up the total fraction + * represented by MCV entries. + */ + mcv_selec = 0.0; + sumcommon = 0.0; - /* release temp storage if needed */ - if (!typbyval) + if (get_attstatsslot(statsTuple, typid, typmod, + STATISTIC_KIND_MCV, InvalidOid, + &values, &nvalues, + &numbers, &nnumbers)) + { + for (i = 0; i < nvalues; i++) { - pfree(DatumGetPointer(hival)); - pfree(DatumGetPointer(loval)); + if (DatumGetBool(FunctionCall2(&opproc, + values[i], + value))) + mcv_selec += numbers[i]; + sumcommon += numbers[i]; } + free_attstatsslot(typid, values, nvalues, numbers, nnumbers); + } + + /* + * If there is a histogram, determine which bin the constant falls in, + * and compute the resulting contribution to selectivity. + * + * Someday, VACUUM might store more than one histogram per rel/att, + * corresponding to more than one possible sort ordering defined for + * the column type. However, to make that work we will need to figure + * out which staop to search for --- it's not necessarily the one we + * have at hand! (For example, we might have a '<=' operator rather + * than the '<' operator that will appear in staop.) For now, assume + * that whatever appears in pg_statistic is sorted the same way our + * operator sorts. + */ + hist_selec = 0.0; - if (high <= low) + if (get_attstatsslot(statsTuple, typid, typmod, + STATISTIC_KIND_HISTOGRAM, InvalidOid, + &values, &nvalues, + NULL, NULL)) + { + if (nvalues > 1) { + double histfrac; + bool ltcmp; + + ltcmp = DatumGetBool(FunctionCall2(&opproc, + values[0], + value)); + if (isgt) + ltcmp = !ltcmp; + if (!ltcmp) + { + /* Constant is below lower histogram boundary. */ + histfrac = 0.0; + } + else + { + /* + * Scan to find proper location. This could be made faster + * by using a binary-search method, but it's probably not + * worth the trouble for typical histogram sizes. + */ + for (i = 1; i < nvalues; i++) + { + ltcmp = DatumGetBool(FunctionCall2(&opproc, + values[i], + value)); + if (isgt) + ltcmp = !ltcmp; + if (!ltcmp) + break; + } + if (i >= nvalues) + { + /* Constant is above upper histogram boundary. */ + histfrac = 1.0; + } + else + { + double val, + high, + low; + double binfrac; + /* + * We have values[i-1] < constant < values[i]. + * + * Convert the constant and the two nearest bin boundary + * values to a uniform comparison scale, and do a linear + * interpolation within this bin. + */ + if (convert_to_scalar(value, contype, &val, + values[i-1], values[i], typid, + &low, &high)) + { + if (high <= low) + { + /* cope if bin boundaries appear identical */ + binfrac = 0.5; + } + else if (val <= low) + binfrac = 0.0; + else if (val >= high) + binfrac = 1.0; + else + binfrac = (val - low) / (high - low); + } + else + { + /* + * Ideally we'd produce an error here, on the grounds + * that the given operator shouldn't have scalarltsel + * registered as its selectivity func unless we can + * deal with its operand types. But currently, all + * manner of stuff is invoking scalarltsel, so give a + * default estimate until that can be fixed. + */ + binfrac = 0.5; + } + /* + * Now, compute the overall selectivity across the values + * represented by the histogram. We have i-1 full bins + * and binfrac partial bin below the constant. + */ + histfrac = (double) (i-1) + binfrac; + histfrac /= (double) (nvalues - 1); + } + } /* - * If we trusted the stats fully, we could return a small or - * large selec depending on which side of the single data - * point the constant is on. But it seems better to assume - * that the stats are wrong and return a default... + * Now histfrac = fraction of histogram entries below the constant. + * + * Account for "<" vs ">" */ - result = DEFAULT_INEQ_SEL; - } - else if (val < low || val > high) - { - + hist_selec = isgt ? (1.0 - histfrac) : histfrac; /* - * If given value is outside the statistical range, return a - * small or large value; but not 0.0/1.0 since there is a - * chance the stats are out of date. + * The histogram boundaries are only approximate to begin + * with, and may well be out of date anyway. Therefore, + * don't believe extremely small or large selectivity + * estimates. */ - if (flag & SEL_RIGHT) - result = (val < low) ? 0.001 : 0.999; - else - result = (val < low) ? 0.999 : 0.001; - } - else - { - denominator = high - low; - if (flag & SEL_RIGHT) - numerator = val - low; - else - numerator = high - val; - result = numerator / denominator; + if (hist_selec < 0.001) + hist_selec = 0.001; + else if (hist_selec > 0.999) + hist_selec = 0.999; } + + free_attstatsslot(typid, values, nvalues, NULL, 0); } - PG_RETURN_FLOAT8(result); + + /* + * Now merge the results from the MCV and histogram calculations, + * realizing that the histogram covers only the non-null values that + * are not listed in MCV. + */ + selec = 1.0 - stats->stanullfrac - sumcommon; + + if (hist_selec > 0.0) + selec *= hist_selec; + else + { + /* + * If no histogram but there are values not accounted for by MCV, + * arbitrarily assume half of them will match. + */ + selec *= 0.5; + } + + selec += mcv_selec; + + ReleaseSysCache(statsTuple); + + /* result should be in range, but make sure... */ + if (selec < 0.0) + selec = 0.0; + else if (selec > 1.0) + selec = 1.0; + + PG_RETURN_FLOAT8((float8) selec); } /* @@ -428,34 +602,25 @@ scalargtsel(PG_FUNCTION_ARGS) Datum value = PG_GETARG_DATUM(3); int32 flag = PG_GETARG_INT32(4); Oid ltopid; - float8 result; /* - * Compute selectivity of "<", then invert --- but only if we were - * able to produce a non-default estimate. Note that we get the - * negator which strictly speaking means we are looking at "<=" for - * ">" or "<" for ">=". We assume this won't matter. + * Commute so that we have a "<" or "<=" operator, then apply + * scalarltsel. */ - ltopid = get_negator(opid); - if (ltopid) - { - result = DatumGetFloat8(DirectFunctionCall5(scalarltsel, - ObjectIdGetDatum(ltopid), - ObjectIdGetDatum(relid), - Int16GetDatum(attno), - value, - Int32GetDatum(flag))); - } - else + ltopid = get_commutator(opid); + if (!ltopid) { /* Use default selectivity (should we raise an error instead?) */ - result = DEFAULT_INEQ_SEL; + PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL); } - if (result != DEFAULT_INEQ_SEL) - result = 1.0 - result; - - PG_RETURN_FLOAT8(result); + flag ^= SEL_RIGHT; + return DirectFunctionCall5(scalarltsel, + ObjectIdGetDatum(ltopid), + ObjectIdGetDatum(relid), + Int16GetDatum(attno), + value, + Int32GetDatum(flag)); } /* @@ -476,7 +641,7 @@ patternsel(PG_FUNCTION_ARGS, Pattern_Type ptype) result = DEFAULT_MATCH_SEL; else { - HeapTuple oprtuple; + HeapTuple oprTuple; Oid ltype, rtype; char *patt; @@ -488,14 +653,14 @@ patternsel(PG_FUNCTION_ARGS, Pattern_Type ptype) * Get left and right datatypes of the operator so we know what * type the attribute is. */ - oprtuple = SearchSysCache(OPEROID, + oprTuple = SearchSysCache(OPEROID, ObjectIdGetDatum(opid), 0, 0, 0); - if (!HeapTupleIsValid(oprtuple)) + if (!HeapTupleIsValid(oprTuple)) elog(ERROR, "patternsel: no tuple for operator %u", opid); - ltype = ((Form_pg_operator) GETSTRUCT(oprtuple))->oprleft; - rtype = ((Form_pg_operator) GETSTRUCT(oprtuple))->oprright; - ReleaseSysCache(oprtuple); + ltype = ((Form_pg_operator) GETSTRUCT(oprTuple))->oprleft; + rtype = ((Form_pg_operator) GETSTRUCT(oprTuple))->oprright; + ReleaseSysCache(oprTuple); /* the right-hand const is type text for all supported operators */ Assert(rtype == TEXTOID); @@ -659,42 +824,88 @@ eqjoinsel(PG_FUNCTION_ARGS) AttrNumber attno1 = PG_GETARG_INT16(2); Oid relid2 = PG_GETARG_OID(3); AttrNumber attno2 = PG_GETARG_INT16(4); - float8 result; - float8 num1, - num2, - min; bool unknown1 = NONVALUE(relid1) || NONVALUE(attno1); bool unknown2 = NONVALUE(relid2) || NONVALUE(attno2); + double selec; if (unknown1 && unknown2) - result = DEFAULT_EQ_SEL; + selec = DEFAULT_EQ_SEL; else { - num1 = unknown1 ? 1.0 : get_attdispersion(relid1, attno1, 0.01); - num2 = unknown2 ? 1.0 : get_attdispersion(relid2, attno2, 0.01); + Oid typid1; + Oid typid2; + int32 typmod1; + int32 typmod2; + HeapTuple statsTuple1 = NULL; + HeapTuple statsTuple2 = NULL; + Form_pg_statistic stats1 = NULL; + Form_pg_statistic stats2 = NULL; + double nd1, + nd2; + + if (unknown1) + { + nd1 = 100.0; + } + else + { + /* get info about the attribute */ + getattproperties(relid1, attno1, &typid1, &typmod1); + + /* get stats for the attribute, if available */ + statsTuple1 = SearchSysCache(STATRELATT, + ObjectIdGetDatum(relid1), + Int16GetDatum(attno1), + 0, 0); + if (HeapTupleIsValid(statsTuple1)) + stats1 = (Form_pg_statistic) GETSTRUCT(statsTuple1); + + nd1 = get_att_numdistinct(relid1, attno1, typid1, stats1); + } + + if (unknown2) + { + nd2 = 100.0; + } + else + { + /* get info about the attribute */ + getattproperties(relid2, attno2, &typid2, &typmod2); + + /* get stats for the attribute, if available */ + statsTuple2 = SearchSysCache(STATRELATT, + ObjectIdGetDatum(relid2), + Int16GetDatum(attno2), + 0, 0); + if (HeapTupleIsValid(statsTuple2)) + stats2 = (Form_pg_statistic) GETSTRUCT(statsTuple2); + + nd2 = get_att_numdistinct(relid2, attno2, typid2, stats2); + } /* - * The join selectivity cannot be more than num2, since each tuple - * in table 1 could match no more than num2 fraction of tuples in - * table 2 (and that's only if the table-1 tuple matches the most - * common value in table 2, so probably it's less). By the same - * reasoning it is not more than num1. The min is therefore an - * upper bound. + * Estimate the join selectivity as 1 / sqrt(nd1*nd2) + * (can we produce any theory for this)? * - * If we know the dispersion of only one side, use it; the reasoning - * above still works. + * XXX possibility to do better: if both attributes have histograms + * then we could determine the exact join selectivity between the + * MCV sets, and only have to assume the join behavior of the non-MCV + * values. This could be a big win when the MCVs cover a large part + * of the population. * - * XXX can we make a better estimate here? Using the nullfrac - * statistic might be helpful, for example. Assuming the operator - * is strict (does not succeed for null inputs) then the - * selectivity couldn't be more than (1-nullfrac1)*(1-nullfrac2), - * which might be usefully small if there are many nulls. How - * about applying the operator to the most common values? + * XXX what about nulls? */ - min = (num1 < num2) ? num1 : num2; - result = min; + selec = 1.0 / sqrt(nd1 * nd2); + if (selec > 1.0) + selec = 1.0; + + if (HeapTupleIsValid(statsTuple1)) + ReleaseSysCache(statsTuple1); + if (HeapTupleIsValid(statsTuple2)) + ReleaseSysCache(statsTuple2); + } - PG_RETURN_FLOAT8(result); + PG_RETURN_FLOAT8((float8) selec); } /* @@ -829,7 +1040,8 @@ icnlikejoinsel(PG_FUNCTION_ARGS) * Returns "true" if successful. * * All numeric datatypes are simply converted to their equivalent - * "double" values. + * "double" values. XXX what about NUMERIC values that are outside + * the range of "double"? * * String datatypes are converted by convert_string_to_scalar(), * which is explained below. The reason why this routine deals with @@ -917,7 +1129,7 @@ convert_numeric_to_scalar(Datum value, Oid typid) { switch (typid) { - case BOOLOID: + case BOOLOID: return (double) DatumGetBool(value); case INT2OID: return (double) DatumGetInt16(value); @@ -963,6 +1175,8 @@ convert_numeric_to_scalar(Datum value, Oid typid) * three strings before computing the scaled values. This allows us to * "zoom in" when we encounter a narrow data range. An example is a phone * number database where all the values begin with the same area code. + * (Actually, the bounds will be adjacent histogram-bin-boundary values, + * so this is more likely to happen than you might think.) */ static void convert_string_to_scalar(unsigned char *value, @@ -1208,11 +1422,11 @@ convert_timevalue_to_scalar(Datum value, Oid typid) /* * getattproperties * Retrieve pg_attribute properties for an attribute, - * including type OID, type len, type byval flag, typmod. + * including type OID and typmod. */ static void getattproperties(Oid relid, AttrNumber attnum, - Oid *typid, int *typlen, bool *typbyval, int32 *typmod) + Oid *typid, int32 *typmod) { HeapTuple atp; Form_pg_attribute att_tup; @@ -1227,164 +1441,87 @@ getattproperties(Oid relid, AttrNumber attnum, att_tup = (Form_pg_attribute) GETSTRUCT(atp); *typid = att_tup->atttypid; - *typlen = att_tup->attlen; - *typbyval = att_tup->attbyval; *typmod = att_tup->atttypmod; ReleaseSysCache(atp); } /* - * getattstatistics - * Retrieve the pg_statistic data for an attribute. - * Returns 'false' if no stats are available. + * get_att_numdistinct * - * Inputs: - * 'relid' and 'attnum' are the relation and attribute number. - * 'typid' and 'typmod' are the type and typmod of the column, - * which the caller must already have looked up. + * Estimate the number of distinct values of an attribute. * - * Outputs: - * The available stats are nullfrac, commonfrac, commonval, loval, hival. - * The caller need not retrieve all five --- pass NULL pointers for the - * unwanted values. + * relid, attnum: identify the attribute to examine. + * typid: type of attribute. + * stats: pg_statistic tuple for attribute, or NULL if not available. * - * commonval, loval, hival are returned as Datums holding the internal - * representation of the values. (Note that these should be pfree'd - * after use if the data type is not by-value.) + * XXX possible future improvement: look to see if there is a unique + * index on the attribute. If so, we can estimate ndistinct = ntuples. + * This should probably override any info from pg_statistic. */ -static bool -getattstatistics(Oid relid, - AttrNumber attnum, - Oid typid, - int32 typmod, - double *nullfrac, - double *commonfrac, - Datum *commonval, - Datum *loval, - Datum *hival) +static double +get_att_numdistinct(Oid relid, AttrNumber attnum, Oid typid, + Form_pg_statistic stats) { - HeapTuple tuple; - HeapTuple typeTuple; - FmgrInfo inputproc; - Oid typelem; - bool isnull; + HeapTuple reltup; + double ntuples; /* - * We assume that there will only be one entry in pg_statistic for the - * given rel/att, so we search WITHOUT considering the staop column. - * Someday, VACUUM might store more than one entry per rel/att, - * corresponding to more than one possible sort ordering defined for - * the column type. However, to make that work we will need to figure - * out which staop to search for --- it's not necessarily the one we - * have at hand! (For example, we might have a '>' operator rather - * than the '<' operator that will appear in staop.) + * Special-case boolean columns: presumably, two distinct values. + * + * Are there any other cases we should wire in special estimates for? */ - tuple = SearchSysCache(STATRELID, - ObjectIdGetDatum(relid), - Int16GetDatum((int16) attnum), - 0, 0); - if (!HeapTupleIsValid(tuple)) - { - /* no such stats entry */ - return false; - } + if (typid == BOOLOID) + return 2.0; - if (nullfrac) - *nullfrac = ((Form_pg_statistic) GETSTRUCT(tuple))->stanullfrac; - if (commonfrac) - *commonfrac = ((Form_pg_statistic) GETSTRUCT(tuple))->stacommonfrac; - - /* Get the type input proc for the column datatype */ - typeTuple = SearchSysCache(TYPEOID, - ObjectIdGetDatum(typid), - 0, 0, 0); - if (!HeapTupleIsValid(typeTuple)) - elog(ERROR, "getattstatistics: Cache lookup failed for type %u", - typid); - fmgr_info(((Form_pg_type) GETSTRUCT(typeTuple))->typinput, &inputproc); - typelem = ((Form_pg_type) GETSTRUCT(typeTuple))->typelem; - ReleaseSysCache(typeTuple); + /* + * If VACUUM ANALYZE determined a fixed estimate, use it. + */ + if (stats && stats->stadistinct > 0.0) + return stats->stadistinct; /* - * Values are variable-length fields, so cannot access as struct - * fields. Must do it the hard way with SysCacheGetAttr. + * Otherwise we need to get the relation size. */ - if (commonval) - { - Datum val = SysCacheGetAttr(STATRELID, tuple, - Anum_pg_statistic_stacommonval, - &isnull); + reltup = SearchSysCache(RELOID, + ObjectIdGetDatum(relid), + 0, 0, 0); + if (!HeapTupleIsValid(reltup)) + elog(ERROR, "get_att_numdistinct: no relation tuple %u", relid); - if (isnull) - { - elog(DEBUG, "getattstatistics: stacommonval is null"); - *commonval = PointerGetDatum(NULL); - } - else - { - char *strval = DatumGetCString(DirectFunctionCall1(textout, - val)); - - *commonval = FunctionCall3(&inputproc, - CStringGetDatum(strval), - ObjectIdGetDatum(typelem), - Int32GetDatum(typmod)); - pfree(strval); - } - } + ntuples = ((Form_pg_class) GETSTRUCT(reltup))->reltuples; - if (loval) - { - Datum val = SysCacheGetAttr(STATRELID, tuple, - Anum_pg_statistic_staloval, - &isnull); + ReleaseSysCache(reltup); - if (isnull) - { - elog(DEBUG, "getattstatistics: staloval is null"); - *loval = PointerGetDatum(NULL); - } - else - { - char *strval = DatumGetCString(DirectFunctionCall1(textout, - val)); - - *loval = FunctionCall3(&inputproc, - CStringGetDatum(strval), - ObjectIdGetDatum(typelem), - Int32GetDatum(typmod)); - pfree(strval); - } - } + if (ntuples <= 0.0) + return 100.0; /* no data available; return a default */ - if (hival) - { - Datum val = SysCacheGetAttr(STATRELID, tuple, - Anum_pg_statistic_stahival, - &isnull); + /* + * If VACUUM ANALYZE determined a scaled estimate, use it. + */ + if (stats && stats->stadistinct < 0.0) + return - stats->stadistinct * ntuples; - if (isnull) - { - elog(DEBUG, "getattstatistics: stahival is null"); - *hival = PointerGetDatum(NULL); - } - else - { - char *strval = DatumGetCString(DirectFunctionCall1(textout, - val)); - - *hival = FunctionCall3(&inputproc, - CStringGetDatum(strval), - ObjectIdGetDatum(typelem), - Int32GetDatum(typmod)); - pfree(strval); - } + /* + * VACUUM ANALYZE does not compute stats for system attributes, + * but some of them can reasonably be assumed unique anyway. + */ + switch (attnum) + { + case ObjectIdAttributeNumber: + case SelfItemPointerAttributeNumber: + return ntuples; + case TableOidAttributeNumber: + return 1.0; } - ReleaseSysCache(tuple); + /* + * Estimate ndistinct = ntuples if the table is small, else 100. + */ + if (ntuples < 100.0) + return ntuples; - return true; + return 100.0; } /*------------------------------------------------------------------------- diff --git a/src/backend/utils/cache/lsyscache.c b/src/backend/utils/cache/lsyscache.c index 82d55866215..3995de5d7a1 100644 --- a/src/backend/utils/cache/lsyscache.c +++ b/src/backend/utils/cache/lsyscache.c @@ -7,7 +7,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/utils/cache/lsyscache.c,v 1.52 2001/03/23 04:49:55 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/utils/cache/lsyscache.c,v 1.53 2001/05/07 00:43:24 tgl Exp $ * * NOTES * Eventually, the index information should go through here, too. @@ -18,7 +18,10 @@ #include "access/tupmacs.h" #include "catalog/pg_operator.h" #include "catalog/pg_proc.h" +#include "catalog/pg_statistic.h" #include "catalog/pg_type.h" +#include "utils/array.h" +#include "utils/builtins.h" #include "utils/lsyscache.h" #include "utils/syscache.h" @@ -182,106 +185,6 @@ get_atttypmod(Oid relid, AttrNumber attnum) return -1; } -/* - * get_attdispersion - * - * Retrieve the dispersion statistic for an attribute, - * or produce an estimate if no info is available. - * - * min_estimate is the minimum estimate to return if insufficient data - * is available to produce a reliable value. This value may vary - * depending on context. (For example, when deciding whether it is - * safe to use a hashjoin, we want to be more conservative than when - * estimating the number of tuples produced by an equijoin.) - */ -double -get_attdispersion(Oid relid, AttrNumber attnum, double min_estimate) -{ - HeapTuple atp; - Form_pg_attribute att_tup; - double dispersion; - Oid atttypid; - int32 ntuples; - - atp = SearchSysCache(ATTNUM, - ObjectIdGetDatum(relid), - Int16GetDatum(attnum), - 0, 0); - if (!HeapTupleIsValid(atp)) - { - /* this should not happen */ - elog(ERROR, "get_attdispersion: no attribute tuple %u %d", - relid, attnum); - return min_estimate; - } - - att_tup = (Form_pg_attribute) GETSTRUCT(atp); - - dispersion = att_tup->attdispersion; - atttypid = att_tup->atttypid; - - ReleaseSysCache(atp); - - if (dispersion > 0.0) - return dispersion; /* we have a specific estimate from VACUUM */ - - /* - * Special-case boolean columns: the dispersion of a boolean is highly - * unlikely to be anywhere near 1/numtuples, instead it's probably - * more like 0.5. - * - * Are there any other cases we should wire in special estimates for? - */ - if (atttypid == BOOLOID) - return 0.5; - - /* - * Dispersion is either 0 (no data available) or -1 (dispersion is - * 1/numtuples). Either way, we need the relation size. - */ - - atp = SearchSysCache(RELOID, - ObjectIdGetDatum(relid), - 0, 0, 0); - if (!HeapTupleIsValid(atp)) - { - /* this should not happen */ - elog(ERROR, "get_attdispersion: no relation tuple %u", relid); - return min_estimate; - } - - ntuples = ((Form_pg_class) GETSTRUCT(atp))->reltuples; - - ReleaseSysCache(atp); - - if (ntuples == 0) - return min_estimate; /* no data available */ - - if (dispersion < 0.0) /* VACUUM thinks there are no duplicates */ - return 1.0 / (double) ntuples; - - /* - * VACUUM ANALYZE does not compute dispersion for system attributes, - * but some of them can reasonably be assumed unique anyway. - */ - if (attnum == ObjectIdAttributeNumber || - attnum == SelfItemPointerAttributeNumber) - return 1.0 / (double) ntuples; - if (attnum == TableOidAttributeNumber) - return 1.0; - - /* - * VACUUM ANALYZE has not been run for this table. Produce an estimate - * of 1/numtuples. This may produce unreasonably small estimates for - * large tables, so limit the estimate to no less than min_estimate. - */ - dispersion = 1.0 / (double) ntuples; - if (dispersion < min_estimate) - dispersion = min_estimate; - - return dispersion; -} - /* ---------- INDEX CACHE ---------- */ /* watch this space... @@ -876,3 +779,157 @@ get_typtype(Oid typid) } #endif + +/* ---------- STATISTICS CACHE ---------- */ + +/* + * get_attstatsslot + * + * Extract the contents of a "slot" of a pg_statistic tuple. + * Returns TRUE if requested slot type was found, else FALSE. + * + * Unlike other routines in this file, this takes a pointer to an + * already-looked-up tuple in the pg_statistic cache. We do this since + * most callers will want to extract more than one value from the cache + * entry, and we don't want to repeat the cache lookup unnecessarily. + * + * statstuple: pg_statistics tuple to be examined. + * atttype: type OID of attribute. + * atttypmod: typmod of attribute. + * reqkind: STAKIND code for desired statistics slot kind. + * reqop: STAOP value wanted, or InvalidOid if don't care. + * values, nvalues: if not NULL, the slot's stavalues are extracted. + * numbers, nnumbers: if not NULL, the slot's stanumbers are extracted. + * + * If assigned, values and numbers are set to point to palloc'd arrays. + * If the attribute type is pass-by-reference, the values referenced by + * the values array are themselves palloc'd. The palloc'd stuff can be + * freed by calling free_attstatsslot. + */ +bool +get_attstatsslot(HeapTuple statstuple, + Oid atttype, int32 atttypmod, + int reqkind, Oid reqop, + Datum **values, int *nvalues, + float4 **numbers, int *nnumbers) +{ + Form_pg_statistic stats = (Form_pg_statistic) GETSTRUCT(statstuple); + int i, + j; + Datum val; + bool isnull; + ArrayType *statarray; + int narrayelem; + HeapTuple typeTuple; + FmgrInfo inputproc; + Oid typelem; + + for (i = 0; i < STATISTIC_NUM_SLOTS; i++) + { + if ((&stats->stakind1)[i] == reqkind && + (reqop == InvalidOid || (&stats->staop1)[i] == reqop)) + break; + } + if (i >= STATISTIC_NUM_SLOTS) + return false; /* not there */ + + if (values) + { + val = SysCacheGetAttr(STATRELATT, statstuple, + Anum_pg_statistic_stavalues1 + i, + &isnull); + if (isnull) + elog(ERROR, "get_attstatsslot: stavalues is null"); + statarray = DatumGetArrayTypeP(val); + /* + * Do initial examination of the array. This produces a list + * of text Datums --- ie, pointers into the text array value. + */ + deconstruct_array(statarray, false, -1, 'i', values, nvalues); + narrayelem = *nvalues; + /* + * We now need to replace each text Datum by its internal equivalent. + * + * Get the type input proc and typelem for the column datatype. + */ + typeTuple = SearchSysCache(TYPEOID, + ObjectIdGetDatum(atttype), + 0, 0, 0); + if (!HeapTupleIsValid(typeTuple)) + elog(ERROR, "get_attstatsslot: Cache lookup failed for type %u", + atttype); + fmgr_info(((Form_pg_type) GETSTRUCT(typeTuple))->typinput, &inputproc); + typelem = ((Form_pg_type) GETSTRUCT(typeTuple))->typelem; + ReleaseSysCache(typeTuple); + /* + * Do the conversions. The palloc'd array of Datums is reused + * in place. + */ + for (j = 0; j < narrayelem; j++) + { + char *strval; + + strval = DatumGetCString(DirectFunctionCall1(textout, + (*values)[j])); + (*values)[j] = FunctionCall3(&inputproc, + CStringGetDatum(strval), + ObjectIdGetDatum(typelem), + Int32GetDatum(atttypmod)); + pfree(strval); + } + /* + * Free statarray if it's a detoasted copy. + */ + if ((Pointer) statarray != DatumGetPointer(val)) + pfree(statarray); + } + + if (numbers) + { + val = SysCacheGetAttr(STATRELATT, statstuple, + Anum_pg_statistic_stanumbers1 + i, + &isnull); + if (isnull) + elog(ERROR, "get_attstatsslot: stanumbers is null"); + statarray = DatumGetArrayTypeP(val); + /* + * We expect the array to be a 1-D float4 array; verify that. + * We don't need to use deconstruct_array() since the array + * data is just going to look like a C array of float4 values. + */ + narrayelem = ARR_DIMS(statarray)[0]; + if (ARR_NDIM(statarray) != 1 || narrayelem <= 0 || + ARR_SIZE(statarray) != (ARR_OVERHEAD(1) + narrayelem * sizeof(float4))) + elog(ERROR, "get_attstatsslot: stanumbers is bogus"); + *numbers = (float4 *) palloc(narrayelem * sizeof(float4)); + memcpy(*numbers, ARR_DATA_PTR(statarray), narrayelem * sizeof(float4)); + *nnumbers = narrayelem; + /* + * Free statarray if it's a detoasted copy. + */ + if ((Pointer) statarray != DatumGetPointer(val)) + pfree(statarray); + } + + return true; +} + +void +free_attstatsslot(Oid atttype, + Datum *values, int nvalues, + float4 *numbers, int nnumbers) +{ + if (values) + { + if (! get_typbyval(atttype)) + { + int i; + + for (i = 0; i < nvalues; i++) + pfree(DatumGetPointer(values[i])); + } + pfree(values); + } + if (numbers) + pfree(numbers); +} diff --git a/src/backend/utils/cache/syscache.c b/src/backend/utils/cache/syscache.c index 75ef3179202..4e35b3fb35b 100644 --- a/src/backend/utils/cache/syscache.c +++ b/src/backend/utils/cache/syscache.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/utils/cache/syscache.c,v 1.60 2001/03/22 03:59:57 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/utils/cache/syscache.c,v 1.61 2001/05/07 00:43:24 tgl Exp $ * * NOTES * These routines allow the parser/planner/executor to perform @@ -313,7 +313,7 @@ static struct cachedesc cacheinfo[] = { 0, 0 }}, - {StatisticRelationName, /* STATRELID */ + {StatisticRelationName, /* STATRELATT */ StatisticRelidAttnumIndex, 2, { diff --git a/src/backend/utils/sort/tuplesort.c b/src/backend/utils/sort/tuplesort.c index d27bfb29668..5a77c47c200 100644 --- a/src/backend/utils/sort/tuplesort.c +++ b/src/backend/utils/sort/tuplesort.c @@ -78,7 +78,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/utils/sort/tuplesort.c,v 1.15 2001/03/23 04:49:55 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/utils/sort/tuplesort.c,v 1.16 2001/05/07 00:43:24 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -87,7 +87,11 @@ #include "access/heapam.h" #include "access/nbtree.h" +#include "catalog/catname.h" +#include "catalog/pg_amop.h" +#include "catalog/pg_amproc.h" #include "miscadmin.h" +#include "utils/fmgroids.h" #include "utils/logtape.h" #include "utils/lsyscache.h" #include "utils/tuplesort.h" @@ -263,6 +267,7 @@ struct Tuplesortstate TupleDesc tupDesc; int nKeys; ScanKey scanKeys; + SortFunctionKind *sortFnKinds; /* * These variables are specific to the IndexTuple case; they are set @@ -279,6 +284,7 @@ struct Tuplesortstate Oid datumType; Oid sortOperator; FmgrInfo sortOpFn; /* cached lookup data for sortOperator */ + SortFunctionKind sortFnKind; /* we need typelen and byval in order to know how to copy the Datums. */ int datumTypeLen; bool datumTypeByVal; @@ -458,14 +464,14 @@ tuplesort_begin_common(bool randomAccess) Tuplesortstate * tuplesort_begin_heap(TupleDesc tupDesc, - int nkeys, ScanKey keys, + int nkeys, + Oid *sortOperators, AttrNumber *attNums, bool randomAccess) { Tuplesortstate *state = tuplesort_begin_common(randomAccess); + int i; - AssertArg(nkeys >= 1); - AssertArg(keys[0].sk_attno != 0); - AssertArg(keys[0].sk_procedure != 0); + AssertArg(nkeys > 0); state->comparetup = comparetup_heap; state->copytup = copytup_heap; @@ -475,7 +481,29 @@ tuplesort_begin_heap(TupleDesc tupDesc, state->tupDesc = tupDesc; state->nKeys = nkeys; - state->scanKeys = keys; + state->scanKeys = (ScanKey) palloc(nkeys * sizeof(ScanKeyData)); + MemSet(state->scanKeys, 0, nkeys * sizeof(ScanKeyData)); + state->sortFnKinds = (SortFunctionKind *) + palloc(nkeys * sizeof(SortFunctionKind)); + MemSet(state->sortFnKinds, 0, nkeys * sizeof(SortFunctionKind)); + + for (i = 0; i < nkeys; i++) + { + RegProcedure sortFunction; + + AssertArg(sortOperators[i] != 0); + AssertArg(attNums[i] != 0); + + /* select a function that implements the sort operator */ + SelectSortFunction(sortOperators[i], &sortFunction, + &state->sortFnKinds[i]); + + ScanKeyEntryInitialize(&state->scanKeys[i], + 0x0, + attNums[i], + sortFunction, + (Datum) 0); + } return state; } @@ -507,6 +535,7 @@ tuplesort_begin_datum(Oid datumType, bool randomAccess) { Tuplesortstate *state = tuplesort_begin_common(randomAccess); + RegProcedure sortFunction; int16 typlen; bool typbyval; @@ -518,8 +547,12 @@ tuplesort_begin_datum(Oid datumType, state->datumType = datumType; state->sortOperator = sortOperator; - /* lookup the function that implements the sort operator */ - fmgr_info(get_opcode(sortOperator), &state->sortOpFn); + + /* select a function that implements the sort operator */ + SelectSortFunction(sortOperator, &sortFunction, &state->sortFnKind); + /* and look up the function */ + fmgr_info(sortFunction, &state->sortOpFn); + /* lookup necessary attributes of the datum type */ get_typlenbyval(datumType, &typlen, &typbyval); state->datumTypeLen = typlen; @@ -548,6 +581,13 @@ tuplesort_end(Tuplesortstate *state) } if (state->memtupindex) pfree(state->memtupindex); + + /* this stuff might better belong in a variant-specific shutdown routine */ + if (state->scanKeys) + pfree(state->scanKeys); + if (state->sortFnKinds) + pfree(state->sortFnKinds); + pfree(state); } @@ -1692,6 +1732,7 @@ comparetup_heap(Tuplesortstate *state, const void *a, const void *b) for (nkey = 0; nkey < state->nKeys; nkey++) { ScanKey scanKey = state->scanKeys + nkey; + SortFunctionKind fnKind = state->sortFnKinds[nkey]; AttrNumber attno = scanKey->sk_attno; Datum lattr, rattr; @@ -1708,23 +1749,36 @@ comparetup_heap(Tuplesortstate *state, const void *a, const void *b) } else if (isnull2) return -1; - else if (scanKey->sk_flags & SK_COMMUTE) - { - if (DatumGetBool(FunctionCall2(&scanKey->sk_func, - rattr, lattr))) - return -1; /* a < b after commute */ - if (DatumGetBool(FunctionCall2(&scanKey->sk_func, - lattr, rattr))) - return 1; /* a > b after commute */ - } else { - if (DatumGetBool(FunctionCall2(&scanKey->sk_func, - lattr, rattr))) - return -1; /* a < b */ - if (DatumGetBool(FunctionCall2(&scanKey->sk_func, - rattr, lattr))) - return 1; /* a > b */ + int32 compare; + + if (fnKind == SORTFUNC_LT) + { + if (DatumGetBool(FunctionCall2(&scanKey->sk_func, + lattr, rattr))) + compare = -1; /* a < b */ + else if (DatumGetBool(FunctionCall2(&scanKey->sk_func, + rattr, lattr))) + compare = 1; /* a > b */ + else + compare = 0; + } + else + { + /* sort function is CMP or REVCMP */ + compare = DatumGetInt32(FunctionCall2(&scanKey->sk_func, + lattr, rattr)); + if (fnKind == SORTFUNC_REVCMP) + compare = -compare; + } + + if (compare != 0) + { + if (scanKey->sk_flags & SK_COMMUTE) + compare = -compare; + return compare; + } } } @@ -1852,8 +1906,10 @@ comparetup_index(Tuplesortstate *state, const void *a, const void *b) } else { + /* the comparison function is always of CMP type */ compare = DatumGetInt32(FunctionCall2(&entry->sk_func, - attrDatum1, attrDatum2)); + attrDatum1, + attrDatum2)); } if (compare != 0) @@ -1954,7 +2010,7 @@ comparetup_datum(Tuplesortstate *state, const void *a, const void *b) } else if (rtup->isNull) return -1; - else + else if (state->sortFnKind == SORTFUNC_LT) { if (DatumGetBool(FunctionCall2(&state->sortOpFn, ltup->val, rtup->val))) @@ -1964,6 +2020,17 @@ comparetup_datum(Tuplesortstate *state, const void *a, const void *b) return 1; /* a > b */ return 0; } + else + { + /* sort function is CMP or REVCMP */ + int32 compare; + + compare = DatumGetInt32(FunctionCall2(&state->sortOpFn, + ltup->val, rtup->val)); + if (state->sortFnKind == SORTFUNC_REVCMP) + compare = -compare; + return compare; + } } static void * @@ -2032,3 +2099,119 @@ tuplesize_datum(Tuplesortstate *state, void *tup) return (unsigned int) tuplelen; } } + + +/* + * This routine selects an appropriate sorting function to implement + * a sort operator as efficiently as possible. The straightforward + * method is to use the operator's implementation proc --- ie, "<" + * comparison. However, that way often requires two calls of the function + * per comparison. If we can find a btree three-way comparator function + * associated with the operator, we can use it to do the comparisons + * more efficiently. We also support the possibility that the operator + * is ">" (descending sort), in which case we have to reverse the output + * of the btree comparator. + * + * Possibly this should live somewhere else (backend/catalog/, maybe?). + */ +void +SelectSortFunction(Oid sortOperator, + RegProcedure *sortFunction, + SortFunctionKind *kind) +{ + Relation relation; + HeapScanDesc scan; + ScanKeyData skey[3]; + HeapTuple tuple; + Oid opclass = InvalidOid; + + /* + * Scan pg_amop to see if the target operator is registered as the + * "<" or ">" operator of any btree opclass. It's possible that it + * might be registered both ways (eg, if someone were to build a + * "reverse sort" opclass for some reason); prefer the "<" case if so. + * If the operator is registered the same way in multiple opclasses, + * assume we can use the associated comparator function from any one. + */ + relation = heap_openr(AccessMethodOperatorRelationName, + AccessShareLock); + + ScanKeyEntryInitialize(&skey[0], 0, + Anum_pg_amop_amopid, + F_OIDEQ, + ObjectIdGetDatum(BTREE_AM_OID)); + + ScanKeyEntryInitialize(&skey[1], 0, + Anum_pg_amop_amopopr, + F_OIDEQ, + ObjectIdGetDatum(sortOperator)); + + scan = heap_beginscan(relation, false, SnapshotNow, 2, skey); + + while (HeapTupleIsValid(tuple = heap_getnext(scan, 0))) + { + Form_pg_amop aform = (Form_pg_amop) GETSTRUCT(tuple); + + if (aform->amopstrategy == BTLessStrategyNumber) + { + opclass = aform->amopclaid; + *kind = SORTFUNC_CMP; + break; /* done looking */ + } + else if (aform->amopstrategy == BTGreaterStrategyNumber) + { + opclass = aform->amopclaid; + *kind = SORTFUNC_REVCMP; + /* keep scanning in hopes of finding a BTLess entry */ + } + } + + heap_endscan(scan); + heap_close(relation, AccessShareLock); + + if (OidIsValid(opclass)) + { + /* Found a suitable opclass, get its comparator support function */ + relation = heap_openr(AccessMethodProcedureRelationName, + AccessShareLock); + + ScanKeyEntryInitialize(&skey[0], 0, + Anum_pg_amproc_amid, + F_OIDEQ, + ObjectIdGetDatum(BTREE_AM_OID)); + + ScanKeyEntryInitialize(&skey[1], 0, + Anum_pg_amproc_amopclaid, + F_OIDEQ, + ObjectIdGetDatum(opclass)); + + ScanKeyEntryInitialize(&skey[2], 0, + Anum_pg_amproc_amprocnum, + F_INT2EQ, + Int16GetDatum(BTORDER_PROC)); + + scan = heap_beginscan(relation, false, SnapshotNow, 3, skey); + + *sortFunction = InvalidOid; + + if (HeapTupleIsValid(tuple = heap_getnext(scan, 0))) + { + Form_pg_amproc aform = (Form_pg_amproc) GETSTRUCT(tuple); + *sortFunction = aform->amproc; + } + + heap_endscan(scan); + heap_close(relation, AccessShareLock); + + if (RegProcedureIsValid(*sortFunction)) + return; + } + + /* Can't find a comparator, so use the operator as-is */ + + *kind = SORTFUNC_LT; + *sortFunction = get_opcode(sortOperator); + if (!RegProcedureIsValid(*sortFunction)) + elog(ERROR, "SelectSortFunction: operator %u has no implementation", + sortOperator); +} diff --git a/src/include/access/tuptoaster.h b/src/include/access/tuptoaster.h index 759ab3d39e2..6e38529204d 100644 --- a/src/include/access/tuptoaster.h +++ b/src/include/access/tuptoaster.h @@ -6,15 +6,13 @@ * * Copyright (c) 2000, PostgreSQL Development Team * - * $Id: tuptoaster.h,v 1.10 2001/03/22 04:00:32 momjian Exp $ + * $Id: tuptoaster.h,v 1.11 2001/05/07 00:43:24 tgl Exp $ * *------------------------------------------------------------------------- */ #ifndef TUPTOASTER_H #define TUPTOASTER_H -#ifdef TUPLE_TOASTER_ACTIVE - #include "access/heapam.h" #include "access/htup.h" #include "access/tupmacs.h" @@ -109,7 +107,13 @@ extern varattrib *heap_tuple_untoast_attr(varattrib *attr); */ extern Datum toast_compress_datum(Datum value); -#endif /* TUPLE_TOASTER_ACTIVE */ +/* ---------- + * toast_raw_datum_size - + * + * Return the raw (detoasted) size of a varlena datum + * ---------- + */ +extern Size toast_raw_datum_size(Datum value); #endif /* TUPTOASTER_H */ diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h index 963b11c1d38..832f91fb09f 100644 --- a/src/include/catalog/catversion.h +++ b/src/include/catalog/catversion.h @@ -37,7 +37,7 @@ * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: catversion.h,v 1.70 2001/03/22 04:00:35 momjian Exp $ + * $Id: catversion.h,v 1.71 2001/05/07 00:43:24 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -53,6 +53,6 @@ */ /* yyyymmddN */ -#define CATALOG_VERSION_NO 200101061 +#define CATALOG_VERSION_NO 200105051 #endif diff --git a/src/include/catalog/heap.h b/src/include/catalog/heap.h index a7248f6c6dc..7ab04b05fb2 100644 --- a/src/include/catalog/heap.h +++ b/src/include/catalog/heap.h @@ -7,13 +7,14 @@ * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: heap.h,v 1.34 2001/03/22 04:00:35 momjian Exp $ + * $Id: heap.h,v 1.35 2001/05/07 00:43:24 tgl Exp $ * *------------------------------------------------------------------------- */ #ifndef HEAP_H #define HEAP_H +#include "catalog/pg_attribute.h" #include "utils/rel.h" typedef struct RawColumnDefault @@ -44,4 +45,6 @@ extern void AddRelationRawConstraints(Relation rel, List *rawColDefaults, List *rawConstraints); +extern Form_pg_attribute SystemAttributeDefinition(AttrNumber attno); + #endif /* HEAP_H */ diff --git a/src/include/catalog/index.h b/src/include/catalog/index.h index 1dac0bb1c31..07aaad61c79 100644 --- a/src/include/catalog/index.h +++ b/src/include/catalog/index.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: index.h,v 1.33 2001/03/22 04:00:35 momjian Exp $ + * $Id: index.h,v 1.34 2001/05/07 00:43:24 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -46,7 +46,7 @@ extern void FormIndexDatum(IndexInfo *indexInfo, Datum *datum, char *nullv); -extern void UpdateStats(Oid relid, long reltuples); +extern void UpdateStats(Oid relid, double reltuples); extern bool IndexesAreActive(Oid relid, bool comfirmCommitted); extern void setRelhasindex(Oid relid, bool hasindex); diff --git a/src/include/catalog/indexing.h b/src/include/catalog/indexing.h index 41a580a3777..cc155cf1bbb 100644 --- a/src/include/catalog/indexing.h +++ b/src/include/catalog/indexing.h @@ -8,7 +8,7 @@ * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: indexing.h,v 1.48 2001/03/22 04:00:36 momjian Exp $ + * $Id: indexing.h,v 1.49 2001/05/07 00:43:24 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -171,7 +171,7 @@ DECLARE_UNIQUE_INDEX(pg_rewrite_rulename_index on pg_rewrite using btree(rulenam xDECLARE_UNIQUE_INDEX(pg_shadow_name_index on pg_shadow using btree(usename name_ops)); xDECLARE_UNIQUE_INDEX(pg_shadow_sysid_index on pg_shadow using btree(usesysid int4_ops)); */ -DECLARE_INDEX(pg_statistic_relid_att_index on pg_statistic using btree(starelid oid_ops, staattnum int2_ops)); +DECLARE_UNIQUE_INDEX(pg_statistic_relid_att_index on pg_statistic using btree(starelid oid_ops, staattnum int2_ops)); DECLARE_INDEX(pg_trigger_tgconstrname_index on pg_trigger using btree(tgconstrname name_ops)); DECLARE_INDEX(pg_trigger_tgconstrrelid_index on pg_trigger using btree(tgconstrrelid oid_ops)); DECLARE_INDEX(pg_trigger_tgrelid_index on pg_trigger using btree(tgrelid oid_ops)); diff --git a/src/include/catalog/pg_attribute.h b/src/include/catalog/pg_attribute.h index 58724e94dc9..6e11aa6d530 100644 --- a/src/include/catalog/pg_attribute.h +++ b/src/include/catalog/pg_attribute.h @@ -8,7 +8,7 @@ * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: pg_attribute.h,v 1.70 2001/03/22 04:00:37 momjian Exp $ + * $Id: pg_attribute.h,v 1.71 2001/05/07 00:43:24 tgl Exp $ * * NOTES * the genbki.sh script reads this file and generates .bki @@ -36,15 +36,14 @@ * typedef struct FormData_pg_attribute * * If you change the following, make sure you change the structs for - * system attributes in heap.c and index.c also. + * system attributes in catalog/heap.c also. * ---------------- */ CATALOG(pg_attribute) BOOTSTRAP { Oid attrelid; /* OID of relation containing this * attribute */ - NameData attname; - Oid atttypid; + NameData attname; /* name of attribute */ /* * atttypid is the OID of the instance in Catalog Class pg_type that @@ -53,30 +52,20 @@ CATALOG(pg_attribute) BOOTSTRAP * attalign attributes of this instance, so they had better match or * Postgres will fail. */ - - float4 attdispersion; + Oid atttypid; /* - * attdispersion is the dispersion statistic of the column (0.0 to - * 1.0), or zero if the statistic has not been calculated, or -1.0 if - * VACUUM found that the column contains no duplicate entries (in - * which case the dispersion should be taken as 1.0/numberOfRows for - * the current table size). The -1.0 hack is useful because the - * number of rows may be updated more often than attdispersion is. We - * assume that the column will retain its no-duplicate-entry property. - * (Perhaps this should be driven off the existence of a UNIQUE index - * for the column, instead of being a statistical guess?) + * attstattarget is the target number of statistics datapoints to collect + * during VACUUM ANALYZE of this column. A zero here indicates that we + * do not wish to collect any stats about this column. */ - - int2 attlen; + int4 attstattarget; /* * attlen is a copy of the typlen field from pg_type for this - * attribute. See atttypid above. See struct Form_pg_type for - * definition. + * attribute. See atttypid comments above. */ - - int2 attnum; + int2 attlen; /* * attnum is the "attribute number" for the attribute: A value that @@ -91,10 +80,13 @@ CATALOG(pg_attribute) BOOTSTRAP * * Note that (attnum - 1) is often used as the index to an array. */ + int2 attnum; - int4 attnelems; /* number of dimensions, if an array type */ - - int4 attcacheoff; + /* + * attndims is the declared number of dimensions, if an array type, + * otherwise zero. + */ + int4 attndims; /* * fastgetattr() uses attcacheoff to cache byte offsets of attributes @@ -103,8 +95,7 @@ CATALOG(pg_attribute) BOOTSTRAP * tuple descriptor, we may then update attcacheoff in the copies. * This speeds up the attribute walking process. */ - - int4 atttypmod; + int4 attcacheoff; /* * atttypmod records type-specific data supplied at table creation @@ -113,16 +104,13 @@ CATALOG(pg_attribute) BOOTSTRAP * argument. The value will generally be -1 for types that do not need * typmod. */ - - bool attbyval; + int4 atttypmod; /* * attbyval is a copy of the typbyval field from pg_type for this - * attribute. See atttypid above. See struct Form_pg_type for - * definition. + * attribute. See atttypid comments above. */ - - char attstorage; + bool attbyval; /*---------- * attstorage tells for VARLENA attributes, what the heap access @@ -137,30 +125,31 @@ CATALOG(pg_attribute) BOOTSTRAP * but only as a last resort ('e' and 'x' fields are moved first). *---------- */ + char attstorage; + /* This flag indicates that the attribute is really a set */ bool attisset; - char attalign; /* * attalign is a copy of the typalign field from pg_type for this - * attribute. See atttypid above. See struct Form_pg_type for - * definition. + * attribute. See atttypid comments above. */ - - bool attnotnull; + char attalign; /* This flag represents the "NOT NULL" constraint */ - bool atthasdef; + bool attnotnull; /* Has DEFAULT value or not */ + bool atthasdef; } FormData_pg_attribute; /* * someone should figure out how to do this properly. (The problem is - * the size of the C struct is not the same as the size of the tuple.) + * the size of the C struct is not the same as the size of the tuple + * because of alignment padding at the end of the struct.) */ #define ATTRIBUTE_TUPLE_SIZE \ - (offsetof(FormData_pg_attribute,atthasdef) + sizeof(char)) + (offsetof(FormData_pg_attribute,atthasdef) + sizeof(bool)) /* ---------------- * Form_pg_attribute corresponds to a pointer to a tuple with @@ -178,10 +167,10 @@ typedef FormData_pg_attribute *Form_pg_attribute; #define Anum_pg_attribute_attrelid 1 #define Anum_pg_attribute_attname 2 #define Anum_pg_attribute_atttypid 3 -#define Anum_pg_attribute_attdispersion 4 +#define Anum_pg_attribute_attstattarget 4 #define Anum_pg_attribute_attlen 5 #define Anum_pg_attribute_attnum 6 -#define Anum_pg_attribute_attnelems 7 +#define Anum_pg_attribute_attndims 7 #define Anum_pg_attribute_attcacheoff 8 #define Anum_pg_attribute_atttypmod 9 #define Anum_pg_attribute_attbyval 10 @@ -206,6 +195,7 @@ typedef FormData_pg_attribute *Form_pg_attribute; (attribute)->attnotnull = false; \ (attribute)->atthasdef = false; #endif /* _DROP_COLUMN_HACK__ */ + /* ---------------- * SCHEMA_ macros for declaring hardcoded tuple descriptors. * these are used in utils/cache/relcache.c @@ -231,25 +221,25 @@ typedef FormData_pg_attribute *Form_pg_attribute; * ---------------- */ #define Schema_pg_type \ -{ 1247, {"typname"}, 19, 0, NAMEDATALEN, 1, 0, -1, -1, '\0', 'p', '\0', 'i', '\0', '\0' }, \ -{ 1247, {"typowner"}, 23, 0, 4, 2, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \ -{ 1247, {"typlen"}, 21, 0, 2, 3, 0, -1, -1, '\001', 'p', '\0', 's', '\0', '\0' }, \ -{ 1247, {"typprtlen"}, 21, 0, 2, 4, 0, -1, -1, '\001', 'p', '\0', 's', '\0', '\0' }, \ -{ 1247, {"typbyval"}, 16, 0, 1, 5, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \ -{ 1247, {"typtype"}, 18, 0, 1, 6, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \ -{ 1247, {"typisdefined"}, 16, 0, 1, 7, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \ -{ 1247, {"typdelim"}, 18, 0, 1, 8, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \ -{ 1247, {"typrelid"}, 26, 0, 4, 9, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \ -{ 1247, {"typelem"}, 26, 0, 4, 10, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \ -{ 1247, {"typinput"}, 24, 0, 4, 11, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \ -{ 1247, {"typoutput"}, 24, 0, 4, 12, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \ -{ 1247, {"typreceive"}, 24, 0, 4, 13, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \ -{ 1247, {"typsend"}, 24, 0, 4, 14, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \ -{ 1247, {"typalign"}, 18, 0, 1, 15, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \ -{ 1247, {"typstorage"}, 18, 0, 1, 16, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \ -{ 1247, {"typdefault"}, 25, 0, -1, 17, 0, -1, -1, '\0' , 'x', '\0', 'i', '\0', '\0' } - -DATA(insert OID = 0 ( 1247 typname 19 0 NAMEDATALEN 1 0 -1 -1 f p f i f f)); +{ 1247, {"typname"}, 19, DEFAULT_ATTSTATTARGET, NAMEDATALEN, 1, 0, -1, -1, false, 'p', false, 'i', false, false }, \ +{ 1247, {"typowner"}, 23, 0, 4, 2, 0, -1, -1, true, 'p', false, 'i', false, false }, \ +{ 1247, {"typlen"}, 21, 0, 2, 3, 0, -1, -1, true, 'p', false, 's', false, false }, \ +{ 1247, {"typprtlen"}, 21, 0, 2, 4, 0, -1, -1, true, 'p', false, 's', false, false }, \ +{ 1247, {"typbyval"}, 16, 0, 1, 5, 0, -1, -1, true, 'p', false, 'c', false, false }, \ +{ 1247, {"typtype"}, 18, 0, 1, 6, 0, -1, -1, true, 'p', false, 'c', false, false }, \ +{ 1247, {"typisdefined"}, 16, 0, 1, 7, 0, -1, -1, true, 'p', false, 'c', false, false }, \ +{ 1247, {"typdelim"}, 18, 0, 1, 8, 0, -1, -1, true, 'p', false, 'c', false, false }, \ +{ 1247, {"typrelid"}, 26, 0, 4, 9, 0, -1, -1, true, 'p', false, 'i', false, false }, \ +{ 1247, {"typelem"}, 26, 0, 4, 10, 0, -1, -1, true, 'p', false, 'i', false, false }, \ +{ 1247, {"typinput"}, 24, 0, 4, 11, 0, -1, -1, true, 'p', false, 'i', false, false }, \ +{ 1247, {"typoutput"}, 24, 0, 4, 12, 0, -1, -1, true, 'p', false, 'i', false, false }, \ +{ 1247, {"typreceive"}, 24, 0, 4, 13, 0, -1, -1, true, 'p', false, 'i', false, false }, \ +{ 1247, {"typsend"}, 24, 0, 4, 14, 0, -1, -1, true, 'p', false, 'i', false, false }, \ +{ 1247, {"typalign"}, 18, 0, 1, 15, 0, -1, -1, true, 'p', false, 'c', false, false }, \ +{ 1247, {"typstorage"}, 18, 0, 1, 16, 0, -1, -1, true, 'p', false, 'c', false, false }, \ +{ 1247, {"typdefault"}, 25, 0, -1, 17, 0, -1, -1, false , 'x', false, 'i', false, false } + +DATA(insert OID = 0 ( 1247 typname 19 DEFAULT_ATTSTATTARGET NAMEDATALEN 1 0 -1 -1 f p f i f f)); DATA(insert OID = 0 ( 1247 typowner 23 0 4 2 0 -1 -1 t p f i f f)); DATA(insert OID = 0 ( 1247 typlen 21 0 2 3 0 -1 -1 t p f s f f)); DATA(insert OID = 0 ( 1247 typprtlen 21 0 2 4 0 -1 -1 t p f s f f)); @@ -299,25 +289,25 @@ DATA(insert OID = 0 ( 1262 tableoid 26 0 4 -7 0 -1 -1 t p f i f f)); * ---------------- */ #define Schema_pg_proc \ -{ 1255, {"proname"}, 19, 0, NAMEDATALEN, 1, 0, -1, -1, '\0', 'p', '\0', 'i', '\0', '\0' }, \ -{ 1255, {"proowner"}, 23, 0, 4, 2, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \ -{ 1255, {"prolang"}, 26, 0, 4, 3, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \ -{ 1255, {"proisinh"}, 16, 0, 1, 4, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \ -{ 1255, {"proistrusted"}, 16, 0, 1, 5, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \ -{ 1255, {"proiscachable"}, 16, 0, 1, 6, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \ -{ 1255, {"proisstrict"}, 16, 0, 1, 7, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \ -{ 1255, {"pronargs"}, 21, 0, 2, 8, 0, -1, -1, '\001', 'p', '\0', 's', '\0', '\0' }, \ -{ 1255, {"proretset"}, 16, 0, 1, 9, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \ -{ 1255, {"prorettype"}, 26, 0, 4, 10, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \ -{ 1255, {"proargtypes"}, 30, 0, INDEX_MAX_KEYS*4, 11, 0, -1, -1, '\0', 'p', '\0', 'i', '\0', '\0' }, \ -{ 1255, {"probyte_pct"}, 23, 0, 4, 12, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \ -{ 1255, {"properbyte_cpu"}, 23, 0, 4, 13, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \ -{ 1255, {"propercall_cpu"}, 23, 0, 4, 14, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \ -{ 1255, {"prooutin_ratio"}, 23, 0, 4, 15, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \ -{ 1255, {"prosrc"}, 25, 0, -1, 16, 0, -1, -1, '\0', 'x', '\0', 'i', '\0', '\0' }, \ -{ 1255, {"probin"}, 17, 0, -1, 17, 0, -1, -1, '\0', 'x', '\0', 'i', '\0', '\0' } - -DATA(insert OID = 0 ( 1255 proname 19 0 NAMEDATALEN 1 0 -1 -1 f p f i f f)); +{ 1255, {"proname"}, 19, DEFAULT_ATTSTATTARGET, NAMEDATALEN, 1, 0, -1, -1, false, 'p', false, 'i', false, false }, \ +{ 1255, {"proowner"}, 23, 0, 4, 2, 0, -1, -1, true, 'p', false, 'i', false, false }, \ +{ 1255, {"prolang"}, 26, 0, 4, 3, 0, -1, -1, true, 'p', false, 'i', false, false }, \ +{ 1255, {"proisinh"}, 16, 0, 1, 4, 0, -1, -1, true, 'p', false, 'c', false, false }, \ +{ 1255, {"proistrusted"}, 16, 0, 1, 5, 0, -1, -1, true, 'p', false, 'c', false, false }, \ +{ 1255, {"proiscachable"}, 16, 0, 1, 6, 0, -1, -1, true, 'p', false, 'c', false, false }, \ +{ 1255, {"proisstrict"}, 16, 0, 1, 7, 0, -1, -1, true, 'p', false, 'c', false, false }, \ +{ 1255, {"pronargs"}, 21, 0, 2, 8, 0, -1, -1, true, 'p', false, 's', false, false }, \ +{ 1255, {"proretset"}, 16, 0, 1, 9, 0, -1, -1, true, 'p', false, 'c', false, false }, \ +{ 1255, {"prorettype"}, 26, 0, 4, 10, 0, -1, -1, true, 'p', false, 'i', false, false }, \ +{ 1255, {"proargtypes"}, 30, 0, INDEX_MAX_KEYS*4, 11, 0, -1, -1, false, 'p', false, 'i', false, false }, \ +{ 1255, {"probyte_pct"}, 23, 0, 4, 12, 0, -1, -1, true, 'p', false, 'i', false, false }, \ +{ 1255, {"properbyte_cpu"}, 23, 0, 4, 13, 0, -1, -1, true, 'p', false, 'i', false, false }, \ +{ 1255, {"propercall_cpu"}, 23, 0, 4, 14, 0, -1, -1, true, 'p', false, 'i', false, false }, \ +{ 1255, {"prooutin_ratio"}, 23, 0, 4, 15, 0, -1, -1, true, 'p', false, 'i', false, false }, \ +{ 1255, {"prosrc"}, 25, 0, -1, 16, 0, -1, -1, false, 'x', false, 'i', false, false }, \ +{ 1255, {"probin"}, 17, 0, -1, 17, 0, -1, -1, false, 'x', false, 'i', false, false } + +DATA(insert OID = 0 ( 1255 proname 19 DEFAULT_ATTSTATTARGET NAMEDATALEN 1 0 -1 -1 f p f i f f)); DATA(insert OID = 0 ( 1255 proowner 23 0 4 2 0 -1 -1 t p f i f f)); DATA(insert OID = 0 ( 1255 prolang 26 0 4 3 0 -1 -1 t p f i f f)); DATA(insert OID = 0 ( 1255 proisinh 16 0 1 4 0 -1 -1 t p f c f f)); @@ -346,8 +336,8 @@ DATA(insert OID = 0 ( 1255 tableoid 26 0 4 -7 0 -1 -1 t p f i f f)); * pg_shadow * ---------------- */ -DATA(insert OID = 0 ( 1260 usename 19 0 NAMEDATALEN 1 0 -1 -1 f p f i f f)); -DATA(insert OID = 0 ( 1260 usesysid 23 0 4 2 0 -1 -1 t p f i f f)); +DATA(insert OID = 0 ( 1260 usename 19 DEFAULT_ATTSTATTARGET NAMEDATALEN 1 0 -1 -1 f p f i f f)); +DATA(insert OID = 0 ( 1260 usesysid 23 DEFAULT_ATTSTATTARGET 4 2 0 -1 -1 t p f i f f)); DATA(insert OID = 0 ( 1260 usecreatedb 16 0 1 3 0 -1 -1 t p f c f f)); DATA(insert OID = 0 ( 1260 usetrace 16 0 1 4 0 -1 -1 t p f c f f)); DATA(insert OID = 0 ( 1260 usesuper 16 0 1 5 0 -1 -1 t p f c f f)); @@ -366,8 +356,8 @@ DATA(insert OID = 0 ( 1260 tableoid 26 0 4 -7 0 -1 -1 t p f i f f)); * pg_group * ---------------- */ -DATA(insert OID = 0 ( 1261 groname 19 0 NAMEDATALEN 1 0 -1 -1 f p f i f f)); -DATA(insert OID = 0 ( 1261 grosysid 23 0 4 2 0 -1 -1 t p f i f f)); +DATA(insert OID = 0 ( 1261 groname 19 DEFAULT_ATTSTATTARGET NAMEDATALEN 1 0 -1 -1 f p f i f f)); +DATA(insert OID = 0 ( 1261 grosysid 23 DEFAULT_ATTSTATTARGET 4 2 0 -1 -1 t p f i f f)); DATA(insert OID = 0 ( 1261 grolist 1007 0 -1 3 0 -1 -1 f x f i f f)); DATA(insert OID = 0 ( 1261 ctid 27 0 6 -1 0 -1 -1 f p f i f f)); DATA(insert OID = 0 ( 1261 oid 26 0 4 -2 0 -1 -1 t p f i f f)); @@ -382,29 +372,29 @@ DATA(insert OID = 0 ( 1261 tableoid 26 0 4 -7 0 -1 -1 t p f i f f)); * ---------------- */ #define Schema_pg_attribute \ -{ 1249, {"attrelid"}, 26, 0, 4, 1, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \ -{ 1249, {"attname"}, 19, 0, NAMEDATALEN, 2, 0, -1, -1, '\0', 'p', '\0', 'i', '\0', '\0' }, \ -{ 1249, {"atttypid"}, 26, 0, 4, 3, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \ -{ 1249, {"attdispersion"}, 700, 0, 4, 4, 0, -1, -1, '\0', 'p', '\0', 'i', '\0', '\0' }, \ -{ 1249, {"attlen"}, 21, 0, 2, 5, 0, -1, -1, '\001', 'p', '\0', 's', '\0', '\0' }, \ -{ 1249, {"attnum"}, 21, 0, 2, 6, 0, -1, -1, '\001', 'p', '\0', 's', '\0', '\0' }, \ -{ 1249, {"attnelems"}, 23, 0, 4, 7, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \ -{ 1249, {"attcacheoff"}, 23, 0, 4, 8, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \ -{ 1249, {"atttypmod"}, 23, 0, 4, 9, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \ -{ 1249, {"attbyval"}, 16, 0, 1, 10, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \ -{ 1249, {"attstorage"}, 18, 0, 1, 11, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \ -{ 1249, {"attisset"}, 16, 0, 1, 12, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \ -{ 1249, {"attalign"}, 18, 0, 1, 13, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \ -{ 1249, {"attnotnull"}, 16, 0, 1, 14, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \ -{ 1249, {"atthasdef"}, 16, 0, 1, 15, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' } - -DATA(insert OID = 0 ( 1249 attrelid 26 0 4 1 0 -1 -1 t p f i f f)); -DATA(insert OID = 0 ( 1249 attname 19 0 NAMEDATALEN 2 0 -1 -1 f p f i f f)); +{ 1249, {"attrelid"}, 26, DEFAULT_ATTSTATTARGET, 4, 1, 0, -1, -1, true, 'p', false, 'i', false, false }, \ +{ 1249, {"attname"}, 19, DEFAULT_ATTSTATTARGET, NAMEDATALEN, 2, 0, -1, -1, false, 'p', false, 'i', false, false }, \ +{ 1249, {"atttypid"}, 26, 0, 4, 3, 0, -1, -1, true, 'p', false, 'i', false, false }, \ +{ 1249, {"attstattarget"}, 23, 0, 4, 4, 0, -1, -1, true, 'p', false, 'i', false, false }, \ +{ 1249, {"attlen"}, 21, 0, 2, 5, 0, -1, -1, true, 'p', false, 's', false, false }, \ +{ 1249, {"attnum"}, 21, 0, 2, 6, 0, -1, -1, true, 'p', false, 's', false, false }, \ +{ 1249, {"attndims"}, 23, 0, 4, 7, 0, -1, -1, true, 'p', false, 'i', false, false }, \ +{ 1249, {"attcacheoff"}, 23, 0, 4, 8, 0, -1, -1, true, 'p', false, 'i', false, false }, \ +{ 1249, {"atttypmod"}, 23, 0, 4, 9, 0, -1, -1, true, 'p', false, 'i', false, false }, \ +{ 1249, {"attbyval"}, 16, 0, 1, 10, 0, -1, -1, true, 'p', false, 'c', false, false }, \ +{ 1249, {"attstorage"}, 18, 0, 1, 11, 0, -1, -1, true, 'p', false, 'c', false, false }, \ +{ 1249, {"attisset"}, 16, 0, 1, 12, 0, -1, -1, true, 'p', false, 'c', false, false }, \ +{ 1249, {"attalign"}, 18, 0, 1, 13, 0, -1, -1, true, 'p', false, 'c', false, false }, \ +{ 1249, {"attnotnull"}, 16, 0, 1, 14, 0, -1, -1, true, 'p', false, 'c', false, false }, \ +{ 1249, {"atthasdef"}, 16, 0, 1, 15, 0, -1, -1, true, 'p', false, 'c', false, false } + +DATA(insert OID = 0 ( 1249 attrelid 26 DEFAULT_ATTSTATTARGET 4 1 0 -1 -1 t p f i f f)); +DATA(insert OID = 0 ( 1249 attname 19 DEFAULT_ATTSTATTARGET NAMEDATALEN 2 0 -1 -1 f p f i f f)); DATA(insert OID = 0 ( 1249 atttypid 26 0 4 3 0 -1 -1 t p f i f f)); -DATA(insert OID = 0 ( 1249 attdispersion 700 0 4 4 0 -1 -1 f p f i f f)); +DATA(insert OID = 0 ( 1249 attstattarget 23 0 4 4 0 -1 -1 t p f i f f)); DATA(insert OID = 0 ( 1249 attlen 21 0 2 5 0 -1 -1 t p f s f f)); DATA(insert OID = 0 ( 1249 attnum 21 0 2 6 0 -1 -1 t p f s f f)); -DATA(insert OID = 0 ( 1249 attnelems 23 0 4 7 0 -1 -1 t p f i f f)); +DATA(insert OID = 0 ( 1249 attndims 23 0 4 7 0 -1 -1 t p f i f f)); DATA(insert OID = 0 ( 1249 attcacheoff 23 0 4 8 0 -1 -1 t p f i f f)); DATA(insert OID = 0 ( 1249 atttypmod 23 0 4 9 0 -1 -1 t p f i f f)); DATA(insert OID = 0 ( 1249 attbyval 16 0 1 10 0 -1 -1 t p f c f f)); @@ -426,36 +416,36 @@ DATA(insert OID = 0 ( 1249 tableoid 26 0 4 -7 0 -1 -1 t p f i f f)); * ---------------- */ #define Schema_pg_class \ -{ 1259, {"relname"}, 19, 0, NAMEDATALEN, 1, 0, -1, -1, '\0', 'p', '\0', 'i', '\0', '\0' }, \ -{ 1259, {"reltype"}, 26, 0, 4, 2, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \ -{ 1259, {"relowner"}, 23, 0, 4, 3, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \ -{ 1259, {"relam"}, 26, 0, 4, 4, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \ -{ 1259, {"relfilenode"}, 26, 0, 4, 5, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \ -{ 1259, {"relpages"}, 23, 0, 4, 6, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \ -{ 1259, {"reltuples"}, 23, 0, 4, 7, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \ -{ 1259, {"reltoastrelid"}, 26, 0, 4, 8, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \ -{ 1259, {"reltoastidxid"}, 26, 0, 4, 9, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \ -{ 1259, {"relhasindex"}, 16, 0, 1, 10, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \ -{ 1259, {"relisshared"}, 16, 0, 1, 11, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \ -{ 1259, {"relkind"}, 18, 0, 1, 12, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \ -{ 1259, {"relnatts"}, 21, 0, 2, 13, 0, -1, -1, '\001', 'p', '\0', 's', '\0', '\0' }, \ -{ 1259, {"relchecks"}, 21, 0, 2, 14, 0, -1, -1, '\001', 'p', '\0', 's', '\0', '\0' }, \ -{ 1259, {"reltriggers"}, 21, 0, 2, 15, 0, -1, -1, '\001', 'p', '\0', 's', '\0', '\0' }, \ -{ 1259, {"relukeys"}, 21, 0, 2, 16, 0, -1, -1, '\001', 'p', '\0', 's', '\0', '\0' }, \ -{ 1259, {"relfkeys"}, 21, 0, 2, 17, 0, -1, -1, '\001', 'p', '\0', 's', '\0', '\0' }, \ -{ 1259, {"relrefs"}, 21, 0, 2, 18, 0, -1, -1, '\001', 'p', '\0', 's', '\0', '\0' }, \ -{ 1259, {"relhaspkey"}, 16, 0, 1, 19, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \ -{ 1259, {"relhasrules"}, 16, 0, 1, 20, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \ -{ 1259, {"relhassubclass"},16, 0, 1, 21, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \ -{ 1259, {"relacl"}, 1034, 0, -1, 22, 0, -1, -1, '\0', 'x', '\0', 'i', '\0', '\0' } - -DATA(insert OID = 0 ( 1259 relname 19 0 NAMEDATALEN 1 0 -1 -1 f p f i f f)); +{ 1259, {"relname"}, 19, DEFAULT_ATTSTATTARGET, NAMEDATALEN, 1, 0, -1, -1, false, 'p', false, 'i', false, false }, \ +{ 1259, {"reltype"}, 26, 0, 4, 2, 0, -1, -1, true, 'p', false, 'i', false, false }, \ +{ 1259, {"relowner"}, 23, 0, 4, 3, 0, -1, -1, true, 'p', false, 'i', false, false }, \ +{ 1259, {"relam"}, 26, 0, 4, 4, 0, -1, -1, true, 'p', false, 'i', false, false }, \ +{ 1259, {"relfilenode"}, 26, 0, 4, 5, 0, -1, -1, true, 'p', false, 'i', false, false }, \ +{ 1259, {"relpages"}, 23, 0, 4, 6, 0, -1, -1, true, 'p', false, 'i', false, false }, \ +{ 1259, {"reltuples"}, 700, 0, 4, 7, 0, -1, -1, false, 'p', false, 'i', false, false }, \ +{ 1259, {"reltoastrelid"}, 26, 0, 4, 8, 0, -1, -1, true, 'p', false, 'i', false, false }, \ +{ 1259, {"reltoastidxid"}, 26, 0, 4, 9, 0, -1, -1, true, 'p', false, 'i', false, false }, \ +{ 1259, {"relhasindex"}, 16, 0, 1, 10, 0, -1, -1, true, 'p', false, 'c', false, false }, \ +{ 1259, {"relisshared"}, 16, 0, 1, 11, 0, -1, -1, true, 'p', false, 'c', false, false }, \ +{ 1259, {"relkind"}, 18, 0, 1, 12, 0, -1, -1, true, 'p', false, 'c', false, false }, \ +{ 1259, {"relnatts"}, 21, 0, 2, 13, 0, -1, -1, true, 'p', false, 's', false, false }, \ +{ 1259, {"relchecks"}, 21, 0, 2, 14, 0, -1, -1, true, 'p', false, 's', false, false }, \ +{ 1259, {"reltriggers"}, 21, 0, 2, 15, 0, -1, -1, true, 'p', false, 's', false, false }, \ +{ 1259, {"relukeys"}, 21, 0, 2, 16, 0, -1, -1, true, 'p', false, 's', false, false }, \ +{ 1259, {"relfkeys"}, 21, 0, 2, 17, 0, -1, -1, true, 'p', false, 's', false, false }, \ +{ 1259, {"relrefs"}, 21, 0, 2, 18, 0, -1, -1, true, 'p', false, 's', false, false }, \ +{ 1259, {"relhaspkey"}, 16, 0, 1, 19, 0, -1, -1, true, 'p', false, 'c', false, false }, \ +{ 1259, {"relhasrules"}, 16, 0, 1, 20, 0, -1, -1, true, 'p', false, 'c', false, false }, \ +{ 1259, {"relhassubclass"},16, 0, 1, 21, 0, -1, -1, true, 'p', false, 'c', false, false }, \ +{ 1259, {"relacl"}, 1034, 0, -1, 22, 0, -1, -1, false, 'x', false, 'i', false, false } + +DATA(insert OID = 0 ( 1259 relname 19 DEFAULT_ATTSTATTARGET NAMEDATALEN 1 0 -1 -1 f p f i f f)); DATA(insert OID = 0 ( 1259 reltype 26 0 4 2 0 -1 -1 t p f i f f)); DATA(insert OID = 0 ( 1259 relowner 23 0 4 3 0 -1 -1 t p f i f f)); DATA(insert OID = 0 ( 1259 relam 26 0 4 4 0 -1 -1 t p f i f f)); DATA(insert OID = 0 ( 1259 relfilenode 26 0 4 5 0 -1 -1 t p f i f f)); DATA(insert OID = 0 ( 1259 relpages 23 0 4 6 0 -1 -1 t p f i f f)); -DATA(insert OID = 0 ( 1259 reltuples 23 0 4 7 0 -1 -1 t p f i f f)); +DATA(insert OID = 0 ( 1259 reltuples 700 0 4 7 0 -1 -1 f p f i f f)); DATA(insert OID = 0 ( 1259 reltoastrelid 26 0 4 8 0 -1 -1 t p f i f f)); DATA(insert OID = 0 ( 1259 reltoastidxid 26 0 4 9 0 -1 -1 t p f i f f)); DATA(insert OID = 0 ( 1259 relhasindex 16 0 1 10 0 -1 -1 t p f c f f)); @@ -544,7 +534,7 @@ DATA(insert OID = 0 ( 1219 tableoid 26 0 4 -7 0 -1 -1 t p f i f f)); * ---------------- */ #define Schema_pg_variable \ -{ 1264, {"varfoo"}, 26, 0, 4, 1, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' } +{ 1264, {"varfoo"}, 26, 0, 4, 1, 0, -1, -1, true, 'p', false, 'i', false, false } DATA(insert OID = 0 ( 1264 varfoo 26 0 4 1 0 -1 -1 t p f i f f)); @@ -555,7 +545,7 @@ DATA(insert OID = 0 ( 1264 varfoo 26 0 4 1 0 -1 -1 t p f i f f)); * ---------------- */ #define Schema_pg_log \ -{ 1269, {"logfoo"}, 26, 0, 4, 1, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' } +{ 1269, {"logfoo"}, 26, 0, 4, 1, 0, -1, -1, true, 'p', false, 'i', false, false } DATA(insert OID = 0 ( 1269 logfoo 26 0 4 1 0 -1 -1 t p f i f f)); @@ -566,7 +556,7 @@ DATA(insert OID = 0 ( 1269 logfoo 26 0 4 1 0 -1 -1 t p f i f f)); * ---------------- */ #define Schema_pg_xactlock \ -{ 376, {"xactlockfoo"}, 26, 0, 4, 1, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' } +{ 376, {"xactlockfoo"}, 26, 0, 4, 1, 0, -1, -1, true, 'p', false, 'i', false, false } DATA(insert OID = 0 ( 376 xactlockfoo 26 0 4 1 0 -1 -1 t p f i f f)); diff --git a/src/include/catalog/pg_class.h b/src/include/catalog/pg_class.h index 81e75e14b6a..86de88cc9b6 100644 --- a/src/include/catalog/pg_class.h +++ b/src/include/catalog/pg_class.h @@ -8,7 +8,7 @@ * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: pg_class.h,v 1.47 2001/03/22 04:00:38 momjian Exp $ + * $Id: pg_class.h,v 1.48 2001/05/07 00:43:25 tgl Exp $ * * NOTES * the genbki.sh script reads this file and generates .bki @@ -52,7 +52,7 @@ CATALOG(pg_class) BOOTSTRAP Oid relam; Oid relfilenode; int4 relpages; - int4 reltuples; + float4 reltuples; Oid reltoastrelid; Oid reltoastidxid; bool relhasindex; diff --git a/src/include/catalog/pg_statistic.h b/src/include/catalog/pg_statistic.h index 2f39bea3245..8d6a6b37c16 100644 --- a/src/include/catalog/pg_statistic.h +++ b/src/include/catalog/pg_statistic.h @@ -8,7 +8,7 @@ * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: pg_statistic.h,v 1.10 2001/01/24 19:43:22 momjian Exp $ + * $Id: pg_statistic.h,v 1.11 2001/05/07 00:43:25 tgl Exp $ * * NOTES * the genbki.sh script reads this file and generates .bki @@ -36,40 +36,91 @@ CATALOG(pg_statistic) /* These fields form the unique key for the entry: */ Oid starelid; /* relation containing attribute */ int2 staattnum; /* attribute (column) stats are for */ - Oid staop; /* '<' comparison op used for lo/hi vals */ + + /* the fraction of the column's entries that are NULL: */ + float4 stanullfrac; /* - * Note: the current VACUUM code will never produce more than one - * entry per column, but in theory there could be multiple entries if - * a datatype has more than one useful ordering operator. Also, the - * current code will not write an entry unless it found at least one - * non-NULL value in the column; so the remaining fields will never be - * NULL. + * stawidth is the average width in bytes of non-null entries. For + * fixed-width datatypes this is of course the same as the typlen, but + * for varlena types it is more useful. Note that this is the average + * width of the data as actually stored, post-TOASTing (eg, for a + * moved-out-of-line value, only the size of the pointer object is + * counted). This is the appropriate definition for the primary use of + * the statistic, which is to estimate sizes of in-memory hash tables of + * tuples. + */ + int4 stawidth; + + /* ---------------- + * stadistinct indicates the (approximate) number of distinct non-null + * data values in the column. The interpretation is: + * 0 unknown or not computed + * > 0 actual number of distinct values + * < 0 negative of multiplier for number of rows + * The special negative case allows us to cope with columns that are + * unique (stadistinct = -1) or nearly so (for example, a column in + * which values appear about twice on the average could be represented + * by stadistinct = -0.5). Because the number-of-rows statistic in + * pg_class may be updated more frequently than pg_statistic is, it's + * important to be able to describe such situations as a multiple of + * the number of rows, rather than a fixed number of distinct values. + * But in other cases a fixed number is correct (eg, a boolean column). + * ---------------- + */ + float4 stadistinct; + + /* ---------------- + * To allow keeping statistics on different kinds of datatypes, + * we do not hard-wire any particular meaning for the remaining + * statistical fields. Instead, we provide several "slots" in which + * statistical data can be placed. Each slot includes: + * kind integer code identifying kind of data + * op OID of associated operator, if needed + * numbers float4 array (for statistical values) + * values text array (for representations of data values) + * The ID and operator fields are never NULL; they are zeroes in an + * unused slot. The numbers and values fields are NULL in an unused + * slot, and might also be NULL in a used slot if the slot kind has + * no need for one or the other. + * ---------------- */ + int2 stakind1; + int2 stakind2; + int2 stakind3; + int2 stakind4; + + Oid staop1; + Oid staop2; + Oid staop3; + Oid staop4; + /* - * These fields contain the stats about the column indicated by the - * key + * THE REST OF THESE ARE VARIABLE LENGTH FIELDS, and may even be absent + * (NULL). They cannot be accessed as C struct entries; you have to use + * the full field access machinery (heap_getattr) for them. We declare + * them here for the catalog machinery. */ - float4 stanullfrac; /* the fraction of the entries that are - * NULL */ - float4 stacommonfrac; /* the fraction that are the most common - * val */ + + float4 stanumbers1[1]; + float4 stanumbers2[1]; + float4 stanumbers3[1]; + float4 stanumbers4[1]; /* - * THE REST OF THESE ARE VARIABLE LENGTH FIELDS. They cannot be - * accessed as C struct entries; you have to use the full field access - * machinery (heap_getattr) for them. - * - * All three of these are text representations of data values of the - * column's data type. To re-create the actual Datum, do - * datatypein(textout(givenvalue)). + * Values in these text arrays are external representations of values + * of the column's data type. To re-create the actual Datum, do + * datatypein(textout(arrayelement)). */ - text stacommonval; /* most common non-null value in column */ - text staloval; /* smallest non-null value in column */ - text stahival; /* largest non-null value in column */ + text stavalues1[1]; + text stavalues2[1]; + text stavalues3[1]; + text stavalues4[1]; } FormData_pg_statistic; +#define STATISTIC_NUM_SLOTS 4 + /* ---------------- * Form_pg_statistic corresponds to a pointer to a tuple with * the format of pg_statistic relation. @@ -81,14 +132,78 @@ typedef FormData_pg_statistic *Form_pg_statistic; * compiler constants for pg_statistic * ---------------- */ -#define Natts_pg_statistic 8 +#define Natts_pg_statistic 21 #define Anum_pg_statistic_starelid 1 #define Anum_pg_statistic_staattnum 2 -#define Anum_pg_statistic_staop 3 -#define Anum_pg_statistic_stanullfrac 4 -#define Anum_pg_statistic_stacommonfrac 5 -#define Anum_pg_statistic_stacommonval 6 -#define Anum_pg_statistic_staloval 7 -#define Anum_pg_statistic_stahival 8 +#define Anum_pg_statistic_stanullfrac 3 +#define Anum_pg_statistic_stawidth 4 +#define Anum_pg_statistic_stadistinct 5 +#define Anum_pg_statistic_stakind1 6 +#define Anum_pg_statistic_stakind2 7 +#define Anum_pg_statistic_stakind3 8 +#define Anum_pg_statistic_stakind4 9 +#define Anum_pg_statistic_staop1 10 +#define Anum_pg_statistic_staop2 11 +#define Anum_pg_statistic_staop3 12 +#define Anum_pg_statistic_staop4 13 +#define Anum_pg_statistic_stanumbers1 14 +#define Anum_pg_statistic_stanumbers2 15 +#define Anum_pg_statistic_stanumbers3 16 +#define Anum_pg_statistic_stanumbers4 17 +#define Anum_pg_statistic_stavalues1 18 +#define Anum_pg_statistic_stavalues2 19 +#define Anum_pg_statistic_stavalues3 20 +#define Anum_pg_statistic_stavalues4 21 + +/* + * Currently, three statistical slot "kinds" are defined: most common values, + * histogram, and correlation. Additional "kinds" will probably appear in + * future to help cope with non-scalar datatypes. + * + * Code reading the pg_statistic relation should not assume that a particular + * data "kind" will appear in any particular slot. Instead, search the + * stakind fields to see if the desired data is available. + */ + +/* + * In a "most common values" slot, staop is the OID of the "=" operator + * used to decide whether values are the same or not. stavalues contains + * the K most common non-null values appearing in the column, and stanumbers + * contains their frequencies (fractions of total row count). The values + * shall be ordered in decreasing frequency. Note that since the arrays are + * variable-size, K may be chosen by the statistics collector. Values should + * not appear in MCV unless they have been observed to occur more than once; + * a unique column will have no MCV slot. + */ +#define STATISTIC_KIND_MCV 1 + +/* + * A "histogram" slot describes the distribution of scalar data. staop is + * the OID of the "<" operator that describes the sort ordering. (In theory, + * more than one histogram could appear, if a datatype has more than one + * useful sort operator.) stavalues contains M (>=2) non-null values that + * divide the non-null column data values into M-1 bins of approximately equal + * population. The first stavalues item is the MIN and the last is the MAX. + * stanumbers is not used and should be NULL. IMPORTANT POINT: if an MCV + * slot is also provided, then the histogram describes the data distribution + * *after removing the values listed in MCV* (thus, it's a "compressed + * histogram" in the technical parlance). This allows a more accurate + * representation of the distribution of a column with some very-common + * values. In a column with only a few distinct values, it's possible that + * the MCV list describes the entire data population; in this case the + * histogram reduces to empty and should be omitted. + */ +#define STATISTIC_KIND_HISTOGRAM 2 + +/* + * A "correlation" slot describes the correlation between the physical order + * of table tuples and the ordering of data values of this column, as seen + * by the "<" operator identified by staop. (As with the histogram, more + * than one entry could theoretically appear.) stavalues is not used and + * should be NULL. stanumbers contains a single entry, the correlation + * coefficient between the sequence of data values and the sequence of + * their actual tuple positions. The coefficient ranges from +1 to -1. + */ +#define STATISTIC_KIND_CORRELATION 3 #endif /* PG_STATISTIC_H */ diff --git a/src/include/commands/command.h b/src/include/commands/command.h index 8b108451d2a..7eb1a4fab84 100644 --- a/src/include/commands/command.h +++ b/src/include/commands/command.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: command.h,v 1.26 2001/03/22 04:00:41 momjian Exp $ + * $Id: command.h,v 1.27 2001/05/07 00:43:25 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -43,9 +43,13 @@ extern void PortalCleanup(Portal portal); extern void AlterTableAddColumn(const char *relationName, bool inh, ColumnDef *colDef); -extern void AlterTableAlterColumn(const char *relationName, - bool inh, const char *colName, - Node *newDefault); +extern void AlterTableAlterColumnDefault(const char *relationName, + bool inh, const char *colName, + Node *newDefault); + +extern void AlterTableAlterColumnStatistics(const char *relationName, + bool inh, const char *colName, + Node *statsTarget); extern void AlterTableDropColumn(const char *relationName, bool inh, const char *colName, diff --git a/src/include/commands/vacuum.h b/src/include/commands/vacuum.h index d82d22fcdfc..87bb0007aa0 100644 --- a/src/include/commands/vacuum.h +++ b/src/include/commands/vacuum.h @@ -1,129 +1,27 @@ /*------------------------------------------------------------------------- * * vacuum.h - * header file for postgres vacuum cleaner + * header file for postgres vacuum cleaner and statistics analyzer * * * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: vacuum.h,v 1.34 2001/03/22 04:00:43 momjian Exp $ + * $Id: vacuum.h,v 1.35 2001/05/07 00:43:25 tgl Exp $ * *------------------------------------------------------------------------- */ #ifndef VACUUM_H #define VACUUM_H -#include "catalog/pg_attribute.h" -#include "catalog/pg_index.h" -#include "fmgr.h" -#include "nodes/pg_list.h" -#include "storage/itemptr.h" +#include "nodes/parsenodes.h" -typedef struct VAttListData -{ - int val_dummy; - struct VAttListData *val_next; -} VAttListData; - -typedef VAttListData *VAttList; - -typedef struct VacPageData -{ - BlockNumber blkno; /* BlockNumber of this Page */ - Size free; /* FreeSpace on this Page */ - uint16 offsets_used; /* Number of OffNums used by vacuum */ - uint16 offsets_free; /* Number of OffNums free or to be free */ - OffsetNumber offsets[1]; /* Array of its OffNums */ -} VacPageData; - -typedef VacPageData *VacPage; - -typedef struct VacPageListData -{ - int empty_end_pages;/* Number of "empty" end-pages */ - int num_pages; /* Number of pages in pagedesc */ - int num_allocated_pages; /* Number of allocated pages in - * pagedesc */ - VacPage *pagedesc; /* Descriptions of pages */ -} VacPageListData; - -typedef VacPageListData *VacPageList; - -typedef struct -{ - Form_pg_attribute attr; - Datum best, - guess1, - guess2, - max, - min; - int best_len, - guess1_len, - guess2_len, - max_len, - min_len; - long best_cnt, - guess1_cnt, - guess1_hits, - guess2_hits, - null_cnt, - nonnull_cnt, - max_cnt, - min_cnt; - FmgrInfo f_cmpeq, - f_cmplt, - f_cmpgt; - Oid op_cmplt; - regproc outfunc; - Oid typelem; - bool initialized; -} VacAttrStats; - -typedef struct VRelListData -{ - Oid vrl_relid; - struct VRelListData *vrl_next; -} VRelListData; - -typedef VRelListData *VRelList; - -typedef struct VTupleLinkData -{ - ItemPointerData new_tid; - ItemPointerData this_tid; -} VTupleLinkData; - -typedef VTupleLinkData *VTupleLink; - -typedef struct VTupleMoveData -{ - ItemPointerData tid; /* tuple ID */ - VacPage vacpage; /* where to move */ - bool cleanVpd; /* clean vacpage before using */ -} VTupleMoveData; - -typedef VTupleMoveData *VTupleMove; - -typedef struct VRelStats -{ - Oid relid; - int num_tuples; - int num_pages; - Size min_tlen; - Size max_tlen; - bool hasindex; - int num_vtlinks; - VTupleLink vtlinks; -} VRelStats; - -extern bool VacuumRunning; - -extern void vc_abort(void); -extern void vacuum(char *vacrel, bool verbose, bool analyze, List *anal_cols); -extern void analyze_rel(Oid relid, List *anal_cols2, int MESSAGE_LEVEL); - -#define ATTNVALS_SCALE 1000000000 /* XXX so it can act as a float4 */ +/* in commands/vacuum.c */ +extern void vacuum(VacuumStmt *vacstmt); +extern void vac_update_relstats(Oid relid, long num_pages, double num_tuples, + bool hasindex); +/* in commands/analyze.c */ +extern void analyze_rel(Oid relid, VacuumStmt *vacstmt); #endif /* VACUUM_H */ diff --git a/src/include/config.h.in b/src/include/config.h.in index 0d989dbbb31..01593a4ce96 100644 --- a/src/include/config.h.in +++ b/src/include/config.h.in @@ -8,7 +8,7 @@ * or in config.h afterwards. Of course, if you edit config.h, then your * changes will be overwritten the next time you run configure. * - * $Id: config.h.in,v 1.162 2001/04/14 22:55:02 petere Exp $ + * $Id: config.h.in,v 1.163 2001/05/07 00:43:25 tgl Exp $ */ #ifndef CONFIG_H @@ -157,6 +157,11 @@ #define FUNC_MAX_ARGS INDEX_MAX_KEYS /* + * System default value for pg_attribute.attstattarget + */ +#define DEFAULT_ATTSTATTARGET 10 + +/* * Define this to make libpgtcl's "pg_result -assign" command process C-style * backslash sequences in returned tuple data and convert Postgres array * attributes into Tcl lists. CAUTION: this conversion is *wrong* unless diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h index 2cf9378cf11..0967bef24ba 100644 --- a/src/include/nodes/execnodes.h +++ b/src/include/nodes/execnodes.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: execnodes.h,v 1.57 2001/03/22 04:00:50 momjian Exp $ + * $Id: execnodes.h,v 1.58 2001/05/07 00:43:25 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -628,7 +628,6 @@ typedef struct GroupState * SortState information * * sort_Done indicates whether sort has been performed yet - * sort_Keys scan key structures describing the sort keys * tuplesortstate private state of tuplesort.c * ---------------- */ @@ -636,7 +635,6 @@ typedef struct SortState { CommonScanState csstate; /* its first field is NodeTag */ bool sort_Done; - ScanKey sort_Keys; void *tuplesortstate; } SortState; diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h index 1614d787bcb..63b1b1046a8 100644 --- a/src/include/nodes/parsenodes.h +++ b/src/include/nodes/parsenodes.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: parsenodes.h,v 1.126 2001/03/23 04:49:56 momjian Exp $ + * $Id: parsenodes.h,v 1.127 2001/05/07 00:43:25 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -118,11 +118,12 @@ typedef struct AlterTableStmt NodeTag type; char subtype; /*------------ * A = add column - * T = alter column + * T = alter column default + * S = alter column statistics * D = drop column * C = add constraint * X = drop constraint - * E = add toast table, + * E = create toast table * U = change owner *------------ */ @@ -690,16 +691,20 @@ typedef struct ClusterStmt } ClusterStmt; /* ---------------------- - * Vacuum Statement + * Vacuum and Analyze Statements + * + * Even though these are nominally two statements, it's convenient to use + * just one node type for both. * ---------------------- */ typedef struct VacuumStmt { NodeTag type; - bool verbose; /* print status info */ - bool analyze; /* analyze data */ - char *vacrel; /* table to vacuum */ - List *va_spec; /* columns to analyse */ + bool vacuum; /* do VACUUM step */ + bool analyze; /* do ANALYZE step */ + bool verbose; /* print progress info */ + char *vacrel; /* name of single table to process, or NULL */ + List *va_cols; /* list of column names, or NIL for all */ } VacuumStmt; /* ---------------------- diff --git a/src/include/nodes/primnodes.h b/src/include/nodes/primnodes.h index 3ae8e09f57a..9e69ed60992 100644 --- a/src/include/nodes/primnodes.h +++ b/src/include/nodes/primnodes.h @@ -10,7 +10,7 @@ * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: primnodes.h,v 1.53 2001/03/22 04:00:52 momjian Exp $ + * $Id: primnodes.h,v 1.54 2001/05/07 00:43:25 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -45,8 +45,8 @@ typedef struct FunctionCache *FunctionCachePtr; * reskey and reskeyop are the execution-time representation of sorting. * reskey must be zero in any non-sort-key item. The reskey of sort key * targetlist items for a sort plan node is 1,2,...,n for the n sort keys. - * The reskeyop of each such targetlist item is the sort operator's - * regproc OID. reskeyop will be zero in non-sort-key items. + * The reskeyop of each such targetlist item is the sort operator's OID. + * reskeyop will be zero in non-sort-key items. * * Both reskey and reskeyop are typically zero during parse/plan stages. * The executor does not pay any attention to ressortgroupref. @@ -62,7 +62,7 @@ typedef struct Resdom Index ressortgroupref; /* nonzero if referenced by a sort/group clause */ Index reskey; /* order of key in a sort (for those > 0) */ - Oid reskeyop; /* sort operator's regproc Oid */ + Oid reskeyop; /* sort operator's Oid */ bool resjunk; /* set to true to eliminate the attribute * from final target list */ } Resdom; diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h index f643ef87968..c76d9b4af71 100644 --- a/src/include/nodes/relation.h +++ b/src/include/nodes/relation.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: relation.h,v 1.54 2001/03/22 04:00:53 momjian Exp $ + * $Id: relation.h,v 1.55 2001/05/07 00:43:26 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -512,8 +512,8 @@ typedef struct RestrictInfo Oid hashjoinoperator; /* copy of clause operator */ /* cache space for hashclause processing; -1 if not yet set */ - Selectivity left_dispersion;/* dispersion of left side */ - Selectivity right_dispersion; /* dispersion of right side */ + Selectivity left_bucketsize; /* avg bucketsize of left side */ + Selectivity right_bucketsize; /* avg bucketsize of right side */ } RestrictInfo; /* diff --git a/src/include/optimizer/cost.h b/src/include/optimizer/cost.h index 5caa576f0c0..cbf6df063a3 100644 --- a/src/include/optimizer/cost.h +++ b/src/include/optimizer/cost.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: cost.h,v 1.38 2001/02/16 00:03:05 tgl Exp $ + * $Id: cost.h,v 1.39 2001/05/07 00:43:26 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -64,7 +64,8 @@ extern void cost_mergejoin(Path *path, Path *outer_path, Path *inner_path, List *restrictlist, List *outersortkeys, List *innersortkeys); extern void cost_hashjoin(Path *path, Path *outer_path, Path *inner_path, - List *restrictlist, Selectivity innerdispersion); + List *restrictlist, Selectivity innerbucketsize); +extern Selectivity estimate_hash_bucketsize(Query *root, Var *var); extern Cost cost_qual_eval(List *quals); extern void set_baserel_size_estimates(Query *root, RelOptInfo *rel); extern void set_joinrel_size_estimates(Query *root, RelOptInfo *rel, diff --git a/src/include/optimizer/pathnode.h b/src/include/optimizer/pathnode.h index 5b71eded86f..0839feb4b2f 100644 --- a/src/include/optimizer/pathnode.h +++ b/src/include/optimizer/pathnode.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: pathnode.h,v 1.35 2001/03/22 04:00:54 momjian Exp $ + * $Id: pathnode.h,v 1.36 2001/05/07 00:43:26 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -59,7 +59,7 @@ extern HashPath *create_hashjoin_path(RelOptInfo *joinrel, Path *inner_path, List *restrict_clauses, List *hashclauses, - Selectivity innerdispersion); + Selectivity innerbucketsize); /* * prototypes for relnode.c diff --git a/src/include/utils/lsyscache.h b/src/include/utils/lsyscache.h index f1c4aff1c80..6b35deed286 100644 --- a/src/include/utils/lsyscache.h +++ b/src/include/utils/lsyscache.h @@ -6,7 +6,7 @@ * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: lsyscache.h,v 1.30 2001/03/22 04:01:13 momjian Exp $ + * $Id: lsyscache.h,v 1.31 2001/05/07 00:43:26 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -21,8 +21,6 @@ extern AttrNumber get_attnum(Oid relid, char *attname); extern Oid get_atttype(Oid relid, AttrNumber attnum); extern bool get_attisset(Oid relid, char *attname); extern int32 get_atttypmod(Oid relid, AttrNumber attnum); -extern double get_attdispersion(Oid relid, AttrNumber attnum, - double min_estimate); extern RegProcedure get_opcode(Oid opno); extern char *get_opname(Oid opno); extern bool op_mergejoinable(Oid opno, Oid ltype, Oid rtype, @@ -41,6 +39,14 @@ extern bool get_typbyval(Oid typid); extern void get_typlenbyval(Oid typid, int16 *typlen, bool *typbyval); extern char get_typstorage(Oid typid); extern Datum get_typdefault(Oid typid); +extern bool get_attstatsslot(HeapTuple statstuple, + Oid atttype, int32 atttypmod, + int reqkind, Oid reqop, + Datum **values, int *nvalues, + float4 **numbers, int *nnumbers); +extern void free_attstatsslot(Oid atttype, + Datum *values, int nvalues, + float4 *numbers, int nnumbers); #define TypeIsToastable(typid) (get_typstorage(typid) != 'p') diff --git a/src/include/utils/syscache.h b/src/include/utils/syscache.h index 8d4e2ae62c4..342f7bf8a56 100644 --- a/src/include/utils/syscache.h +++ b/src/include/utils/syscache.h @@ -9,7 +9,7 @@ * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: syscache.h,v 1.29 2001/03/22 04:01:14 momjian Exp $ + * $Id: syscache.h,v 1.30 2001/05/07 00:43:26 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -53,7 +53,7 @@ #define RULEOID 22 #define SHADOWNAME 23 #define SHADOWSYSID 24 -#define STATRELID 25 +#define STATRELATT 25 #define TYPENAME 26 #define TYPEOID 27 diff --git a/src/include/utils/tuplesort.h b/src/include/utils/tuplesort.h index 7f273776c36..001761796e2 100644 --- a/src/include/utils/tuplesort.h +++ b/src/include/utils/tuplesort.h @@ -13,7 +13,7 @@ * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: tuplesort.h,v 1.6 2001/01/24 19:43:29 momjian Exp $ + * $Id: tuplesort.h,v 1.7 2001/05/07 00:43:26 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -36,8 +36,9 @@ typedef struct Tuplesortstate Tuplesortstate; */ extern Tuplesortstate *tuplesort_begin_heap(TupleDesc tupDesc, - int nkeys, ScanKey keys, - bool randomAccess); + int nkeys, + Oid *sortOperators, AttrNumber *attNums, + bool randomAccess); extern Tuplesortstate *tuplesort_begin_index(Relation indexRel, bool enforceUnique, bool randomAccess); @@ -75,4 +76,19 @@ extern void tuplesort_rescan(Tuplesortstate *state); extern void tuplesort_markpos(Tuplesortstate *state); extern void tuplesort_restorepos(Tuplesortstate *state); +/* + * This routine selects an appropriate sorting function to implement + * a sort operator as efficiently as possible. + */ +typedef enum +{ + SORTFUNC_LT, /* raw "<" operator */ + SORTFUNC_CMP, /* -1 / 0 / 1 three-way comparator */ + SORTFUNC_REVCMP /* 1 / 0 / -1 (reversed) 3-way comparator */ +} SortFunctionKind; + +extern void SelectSortFunction(Oid sortOperator, + RegProcedure *sortFunction, + SortFunctionKind *kind); + #endif /* TUPLESORT_H */ diff --git a/src/interfaces/ecpg/preproc/keywords.c b/src/interfaces/ecpg/preproc/keywords.c index 5614a34b0fe..c03880f497d 100644 --- a/src/interfaces/ecpg/preproc/keywords.c +++ b/src/interfaces/ecpg/preproc/keywords.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/interfaces/ecpg/preproc/keywords.c,v 1.39 2001/03/22 04:01:21 momjian Exp $ + * $Header: /cvsroot/pgsql/src/interfaces/ecpg/preproc/keywords.c,v 1.40 2001/05/07 00:43:26 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -238,6 +238,7 @@ static ScanKeyword ScanKeywords[] = { {"some", SOME}, {"start", START}, {"statement", STATEMENT}, + {"statistics", STATISTICS}, {"stdin", STDIN}, {"stdout", STDOUT}, {"substring", SUBSTRING}, diff --git a/src/interfaces/ecpg/preproc/preproc.y b/src/interfaces/ecpg/preproc/preproc.y index 345efb6576e..91708bd91fa 100644 --- a/src/interfaces/ecpg/preproc/preproc.y +++ b/src/interfaces/ecpg/preproc/preproc.y @@ -134,7 +134,7 @@ make_name(void) %union { double dval; - int ival; + int ival; char * str; struct when action; struct index index; @@ -224,7 +224,7 @@ make_name(void) NONE, NOTHING, NOTIFY, NOTNULL, OFFSET, OIDS, OPERATOR, OWNER, PASSWORD, PROCEDURAL, REINDEX, RENAME, RESET, RETURNS, ROW, RULE, SEQUENCE, SERIAL, SETOF, SHARE, - SHOW, START, STATEMENT, STDIN, STDOUT, SYSID TEMP, + SHOW, START, STATEMENT, STATISTICS, STDIN, STDOUT, SYSID TEMP, TEMPLATE, TOAST, TRUNCATE, TRUSTED, UNLISTEN, UNTIL, VACUUM, VALID, VERBOSE, VERSION @@ -285,7 +285,7 @@ make_name(void) %type <str> file_name AexprConst ParamNo c_expr ConstTypename %type <str> in_expr_nodes a_expr b_expr TruncateStmt CommentStmt %type <str> opt_indirection expr_list extract_list extract_arg -%type <str> position_list substr_list substr_from alter_column_action +%type <str> position_list substr_list substr_from alter_column_default %type <str> trim_list in_expr substr_for attr attrs drop_behavior %type <str> Typename SimpleTypename Generic Numeric generic opt_float opt_numeric %type <str> opt_decimal Character character opt_varying opt_charset @@ -293,7 +293,7 @@ make_name(void) %type <str> row_expr row_descriptor row_list ConstDatetime opt_chain %type <str> SelectStmt into_clause OptTemp ConstraintAttributeSpec %type <str> opt_table opt_all sort_clause sortby_list ConstraintAttr -%type <str> sortby OptUseOp opt_inh_star relation_name_list name_list +%type <str> sortby OptUseOp relation_name_list name_list %type <str> group_clause having_clause from_clause opt_distinct %type <str> join_outer where_clause relation_expr sub_type opt_arg %type <str> opt_column_list insert_rest InsertStmt OptimizableStmt @@ -301,8 +301,8 @@ make_name(void) %type <str> NotifyStmt columnElem copy_dirn UnlistenStmt copy_null %type <str> copy_delimiter ListenStmt CopyStmt copy_file_name opt_binary %type <str> opt_with_copy FetchStmt direction fetch_how_many from_in -%type <str> ClosePortalStmt DropStmt VacuumStmt opt_verbose func_arg -%type <str> opt_analyze opt_va_list va_list ExplainStmt index_params +%type <str> ClosePortalStmt DropStmt VacuumStmt AnalyzeStmt opt_verbose func_arg +%type <str> analyze_keyword opt_name_list ExplainStmt index_params %type <str> index_list func_index index_elem opt_class access_method_clause %type <str> index_opt_unique IndexStmt func_return ConstInterval %type <str> func_args_list func_args opt_with ProcedureStmt def_arg @@ -329,7 +329,7 @@ make_name(void) %type <str> opt_cursor opt_lmode ConstraintsSetStmt comment_tg AllConst %type <str> case_expr when_clause_list case_default case_arg when_clause %type <str> select_clause opt_select_limit select_limit_value ConstraintTimeSpec -%type <str> select_offset_value ReindexStmt join_type opt_only opt_boolean +%type <str> select_offset_value ReindexStmt join_type opt_boolean %type <str> join_qual update_list AlterSchemaStmt joined_table %type <str> opt_level opt_lock lock_type users_in_new_group_clause %type <str> OptConstrFromTable comment_op OptTempTableName StringConst @@ -447,6 +447,7 @@ stmt: AlterSchemaStmt { output_statement($1, 0, NULL, connection); } | CreatedbStmt { output_statement($1, 0, NULL, connection); } | DropdbStmt { output_statement($1, 0, NULL, connection); } | VacuumStmt { output_statement($1, 0, NULL, connection); } + | AnalyzeStmt { output_statement($1, 0, NULL, connection); } | VariableSetStmt { output_statement($1, 0, NULL, connection); } | VariableShowStmt { output_statement($1, 0, NULL, connection); } | VariableResetStmt { output_statement($1, 0, NULL, connection); } @@ -909,39 +910,40 @@ CheckPointStmt: CHECKPOINT { $$= make_str("checkpoint"); } /***************************************************************************** * - * QUERY : - * * ALTER TABLE variations * *****************************************************************************/ AlterTableStmt: -/* ALTER TABLE <name> ADD [COLUMN] <coldef> */ - ALTER TABLE relation_name opt_inh_star ADD opt_column columnDef +/* ALTER TABLE <relation> ADD [COLUMN] <coldef> */ + ALTER TABLE relation_expr ADD opt_column columnDef + { + $$ = cat_str(5, make_str("alter table"), $3, make_str("add"), $5, $6); + } +/* ALTER TABLE <relation> ALTER [COLUMN] <colname> {SET DEFAULT <expr>|DROP DEFAULT} */ + | ALTER TABLE relation_expr ALTER opt_column ColId alter_column_default { - $$ = cat_str(6, make_str("alter table"), $3, $4, make_str("add"), $6, $7); + $$ = cat_str(6, make_str("alter table"), $3, make_str("alter"), $5, $6, $7); } -/* ALTER TABLE <name> ALTER [COLUMN] <colname> {SET DEFAULT <expr>|DROP -DEFAULT} */ - | ALTER TABLE relation_name opt_inh_star ALTER opt_column ColId - alter_column_action +/* ALTER TABLE <relation> ALTER [COLUMN] <colname> SET STATISTICS <Iconst> */ + | ALTER TABLE relation_expr ALTER opt_column ColId SET STATISTICS Iconst { - $$ = cat_str(7, make_str("alter table"), $3, $4, make_str("alter"), $6, $7, $8); + $$ = cat_str(7, make_str("alter table"), $3, make_str("alter"), $5, $6, make_str("set statistics"), $9); } -/* ALTER TABLE <name> DROP [COLUMN] <name> {RESTRICT|CASCADE} */ - | ALTER TABLE relation_name opt_inh_star DROP opt_column ColId drop_behavior +/* ALTER TABLE <relation> DROP [COLUMN] <colname> {RESTRICT|CASCADE} */ + | ALTER TABLE relation_expr DROP opt_column ColId drop_behavior { - $$ = cat_str(7, make_str("alter table"), $3, $4, make_str("drop"), $6, $7, $8); + $$ = cat_str(6, make_str("alter table"), $3, make_str("drop"), $5, $6, $7); } -/* ALTER TABLE <name> ADD CONSTRAINT ... */ - | ALTER TABLE relation_name opt_inh_star ADD TableConstraint +/* ALTER TABLE <relation> ADD CONSTRAINT ... */ + | ALTER TABLE relation_expr ADD TableConstraint { - $$ = cat_str(5, make_str("alter table"), $3, $4, make_str("add"), $6); + $$ = cat_str(4, make_str("alter table"), $3, make_str("add"), $5); } -/* ALTER TABLE <name> DROP CONSTRAINT ... */ - | ALTER TABLE relation_name opt_inh_star DROP CONSTRAINT name drop_behavior +/* ALTER TABLE <relation> DROP CONSTRAINT ... */ + | ALTER TABLE relation_expr DROP CONSTRAINT name drop_behavior { - $$ = cat_str(6, make_str("alter table"), $3, $4, make_str("drop constraint"), $7, $8); + $$ = cat_str(5, make_str("alter table"), $3, make_str("drop constraint"), $6, $7); } /* ALTER TABLE <name> OWNER TO UserId */ | ALTER TABLE relation_name OWNER TO UserId @@ -950,7 +952,7 @@ DEFAULT} */ } ; -alter_column_action: +alter_column_default: SET DEFAULT a_expr { $$ = cat2_str(make_str("set default"), $3); } | DROP DEFAULT { $$ = make_str("drop default"); } ; @@ -1234,10 +1236,6 @@ key_reference: NO ACTION { $$ = make_str("no action"); } | SET NULL_P { $$ = make_str("set null"); } ; -opt_only: ONLY { $$ = make_str("only"); } - | /*EMPTY*/ { $$ = EMPTY; } - ; - OptInherit: INHERITS '(' relation_name_list ')' { $$ = cat_str(3, make_str("inherits ("), $3, make_str(")")); } | /*EMPTY*/ { $$ = EMPTY; } ; @@ -2013,10 +2011,9 @@ opt_force: FORCE { $$ = make_str("force"); } * *****************************************************************************/ -RenameStmt: ALTER TABLE relation_name opt_inh_star - RENAME opt_column opt_name TO name +RenameStmt: ALTER TABLE relation_expr RENAME opt_column opt_name TO name { - $$ = cat_str(8, make_str("alter table"), $3, $4, make_str("rename"), $6, $7, make_str("to"), $9); + $$ = cat_str(7, make_str("alter table"), $3, make_str("rename"), $5, $6, make_str("to"), $8); } ; @@ -2250,38 +2247,44 @@ ClusterStmt: CLUSTER index_name ON relation_name * * QUERY: * vacuum + * analyze * *****************************************************************************/ -VacuumStmt: VACUUM opt_verbose opt_analyze +VacuumStmt: VACUUM opt_verbose + { + $$ = cat_str(2, make_str("vacuum"), $2); + } + | VACUUM opt_verbose relation_name { $$ = cat_str(3, make_str("vacuum"), $2, $3); } - | VACUUM opt_verbose opt_analyze relation_name opt_va_list + | VACUUM opt_verbose AnalyzeStmt { - if ( strlen($5) > 0 && strlen($4) == 0 ) - mmerror(ET_ERROR, "VACUUM syntax error at or near \"(\"\n\tRelations name must be specified"); - $$ = cat_str(5, make_str("vacuum"), $2, $3, $4, $5); + $$ = cat_str(3, make_str("vacuum"), $2, $3); } ; -opt_verbose: VERBOSE { $$ = make_str("verbose"); } - | /*EMPTY*/ { $$ = EMPTY; } +AnalyzeStmt: analyze_keyword opt_verbose + { + $$ = cat_str(2, $1, $2); + } + | analyze_keyword opt_verbose relation_name opt_name_list + { + $$ = cat_str(4, $1, $2, $3, $4); + } ; -opt_analyze: ANALYZE { $$ = make_str("analyze"); } - | ANALYSE { $$ = make_str("analyse"); } - | /*EMPTY*/ { $$ = EMPTY; } +analyze_keyword: ANALYZE { $$ = make_str("analyze"); } + | ANALYSE { $$ = make_str("analyse"); } ; -opt_va_list: '(' va_list ')' { $$ = cat_str(3, make_str("("), $2, make_str(")")); } +opt_verbose: VERBOSE { $$ = make_str("verbose"); } | /*EMPTY*/ { $$ = EMPTY; } ; -va_list: name - { $$=$1; } - | va_list ',' name - { $$=cat_str(3, $1, make_str(","), $3); } +opt_name_list: '(' name_list ')' { $$ = cat_str(3, make_str("("), $2, make_str(")")); } + | /*EMPTY*/ { $$ = EMPTY; } ; @@ -2383,9 +2386,9 @@ columnElem: ColId opt_indirection * *****************************************************************************/ -DeleteStmt: DELETE FROM opt_only name where_clause +DeleteStmt: DELETE FROM relation_expr where_clause { - $$ = cat_str(4, make_str("delete from"), $3, $4, $5); + $$ = cat_str(3, make_str("delete from"), $3, $4); } ; @@ -2416,12 +2419,12 @@ opt_lmode: SHARE { $$ = make_str("share"); } * *****************************************************************************/ -UpdateStmt: UPDATE opt_only relation_name +UpdateStmt: UPDATE relation_expr SET update_target_list from_clause where_clause { - $$ = cat_str(7, make_str("update"), $2, $3, make_str("set"), $5, $6, $7); + $$ = cat_str(6, make_str("update"), $2, make_str("set"), $4, $5, $6); } ; @@ -2667,10 +2670,6 @@ select_offset_value: PosIntConst { * ...however, recursive addattr and rename supported. make special * cases for these. */ -opt_inh_star: '*' { $$ = make_str("*"); } - | /*EMPTY*/ { $$ = EMPTY; } - ; - relation_name_list: name_list { $$ = $1; }; name_list: name @@ -2704,7 +2703,7 @@ opt_for_update_clause: for_update_clause { $$ = $1; } | /* EMPTY */ { $$ = EMPTY; } ; -update_list: OF va_list +update_list: OF name_list { $$ = cat2_str(make_str("of"), $2); } @@ -5028,6 +5027,7 @@ TokenId: ABSOLUTE { $$ = make_str("absolute"); } | SHARE { $$ = make_str("share"); } | START { $$ = make_str("start"); } | STATEMENT { $$ = make_str("statement"); } + | STATISTICS { $$ = make_str("statistics"); } | STDIN { $$ = make_str("stdin"); } | STDOUT { $$ = make_str("stdout"); } | SYSID { $$ = make_str("sysid"); } diff --git a/src/test/regress/expected/oidjoins.out b/src/test/regress/expected/oidjoins.out index a2b0ad9e3e7..46bc60f6955 100644 --- a/src/test/regress/expected/oidjoins.out +++ b/src/test/regress/expected/oidjoins.out @@ -353,12 +353,28 @@ WHERE pg_statistic.starelid != 0 AND -----+---------- (0 rows) -SELECT oid, pg_statistic.staop +SELECT oid, pg_statistic.staop1 FROM pg_statistic -WHERE pg_statistic.staop != 0 AND - NOT EXISTS(SELECT * FROM pg_operator AS t1 WHERE t1.oid = pg_statistic.staop); - oid | staop ------+------- +WHERE pg_statistic.staop1 != 0 AND + NOT EXISTS(SELECT * FROM pg_operator AS t1 WHERE t1.oid = pg_statistic.staop1); + oid | staop1 +-----+-------- +(0 rows) + +SELECT oid, pg_statistic.staop2 +FROM pg_statistic +WHERE pg_statistic.staop2 != 0 AND + NOT EXISTS(SELECT * FROM pg_operator AS t1 WHERE t1.oid = pg_statistic.staop2); + oid | staop2 +-----+-------- +(0 rows) + +SELECT oid, pg_statistic.staop3 +FROM pg_statistic +WHERE pg_statistic.staop3 != 0 AND + NOT EXISTS(SELECT * FROM pg_operator AS t1 WHERE t1.oid = pg_statistic.staop3); + oid | staop3 +-----+-------- (0 rows) SELECT oid, pg_trigger.tgrelid diff --git a/src/test/regress/expected/opr_sanity.out b/src/test/regress/expected/opr_sanity.out index 9d4ff1b3985..1b094a6e3bf 100644 --- a/src/test/regress/expected/opr_sanity.out +++ b/src/test/regress/expected/opr_sanity.out @@ -482,8 +482,8 @@ WHERE p1.aggtransfn = p2.oid AND (p2.pronargs = 1 AND p1.aggbasetype = 0))); oid | aggname | oid | proname -------+---------+-----+------------- - 16997 | max | 768 | int4larger - 17011 | min | 769 | int4smaller + 17010 | max | 768 | int4larger + 17024 | min | 769 | int4smaller (2 rows) -- Cross-check finalfn (if present) against its entry in pg_proc. diff --git a/src/test/regress/sql/oidjoins.sql b/src/test/regress/sql/oidjoins.sql index b7ea1f63eaa..88727a6c76e 100644 --- a/src/test/regress/sql/oidjoins.sql +++ b/src/test/regress/sql/oidjoins.sql @@ -177,10 +177,18 @@ SELECT oid, pg_statistic.starelid FROM pg_statistic WHERE pg_statistic.starelid != 0 AND NOT EXISTS(SELECT * FROM pg_class AS t1 WHERE t1.oid = pg_statistic.starelid); -SELECT oid, pg_statistic.staop +SELECT oid, pg_statistic.staop1 FROM pg_statistic -WHERE pg_statistic.staop != 0 AND - NOT EXISTS(SELECT * FROM pg_operator AS t1 WHERE t1.oid = pg_statistic.staop); +WHERE pg_statistic.staop1 != 0 AND + NOT EXISTS(SELECT * FROM pg_operator AS t1 WHERE t1.oid = pg_statistic.staop1); +SELECT oid, pg_statistic.staop2 +FROM pg_statistic +WHERE pg_statistic.staop2 != 0 AND + NOT EXISTS(SELECT * FROM pg_operator AS t1 WHERE t1.oid = pg_statistic.staop2); +SELECT oid, pg_statistic.staop3 +FROM pg_statistic +WHERE pg_statistic.staop3 != 0 AND + NOT EXISTS(SELECT * FROM pg_operator AS t1 WHERE t1.oid = pg_statistic.staop3); SELECT oid, pg_trigger.tgrelid FROM pg_trigger WHERE pg_trigger.tgrelid != 0 AND |