aboutsummaryrefslogtreecommitdiff
path: root/src/backend/utils/adt/selfuncs.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend/utils/adt/selfuncs.c')
-rw-r--r--src/backend/utils/adt/selfuncs.c117
1 files changed, 116 insertions, 1 deletions
diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c
index e0cfeefaee4..406916c8873 100644
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -15,7 +15,7 @@
*
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/utils/adt/selfuncs.c,v 1.95 2001/07/16 05:06:59 tgl Exp $
+ * $Header: /cvsroot/pgsql/src/backend/utils/adt/selfuncs.c,v 1.96 2001/08/13 18:45:35 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -134,8 +134,16 @@ static void convert_string_to_scalar(unsigned char *value,
double *scaledlobound,
unsigned char *hibound,
double *scaledhibound);
+static void convert_bytea_to_scalar(Datum value,
+ double *scaledvalue,
+ Datum lobound,
+ double *scaledlobound,
+ Datum hibound,
+ double *scaledhibound);
static double convert_one_string_to_scalar(unsigned char *value,
int rangelo, int rangehi);
+static double convert_one_bytea_to_scalar(unsigned char *value, int valuelen,
+ int rangelo, int rangehi);
static unsigned char *convert_string_datum(Datum value, Oid typid);
static double convert_timevalue_to_scalar(Datum value, Oid typid);
static double get_att_numdistinct(Query *root, Var *var,
@@ -1664,6 +1672,9 @@ icnlikejoinsel(PG_FUNCTION_ARGS)
* which is explained below. The reason why this routine deals with
* three values at a time, not just one, is that we need it for strings.
*
+ * The bytea datatype is just enough different from strings that it has
+ * to be treated separately.
+ *
* The several datatypes representing absolute times are all converted
* to Timestamp, which is actually a double, and then we just use that
* double value. Note this will give bad results for the various "special"
@@ -1719,6 +1730,17 @@ convert_to_scalar(Datum value, Oid valuetypid, double *scaledvalue,
}
/*
+ * Built-in bytea type
+ */
+ case BYTEAOID:
+ {
+ convert_bytea_to_scalar(value, scaledvalue,
+ lobound, scaledlobound,
+ hibound, scaledhibound);
+ return true;
+ }
+
+ /*
* Built-in time types
*/
case TIMESTAMPOID:
@@ -1997,6 +2019,99 @@ convert_string_datum(Datum value, Oid typid)
}
/*
+ * Do convert_to_scalar()'s work for any bytea data type.
+ *
+ * Very similar to convert_string_to_scalar except we can't assume
+ * null-termination and therefore pass explicit lengths around.
+ *
+ * Also, assumptions about likely "normal" ranges of characters have been
+ * removed - a data range of 0..255 is always used, for now. (Perhaps
+ * someday we will add information about actual byte data range to
+ * pg_statistic.)
+ */
+static void
+convert_bytea_to_scalar(Datum value,
+ double *scaledvalue,
+ Datum lobound,
+ double *scaledlobound,
+ Datum hibound,
+ double *scaledhibound)
+{
+ int rangelo,
+ rangehi,
+ valuelen = VARSIZE(DatumGetPointer(value)) - VARHDRSZ,
+ loboundlen = VARSIZE(DatumGetPointer(lobound)) - VARHDRSZ,
+ hiboundlen = VARSIZE(DatumGetPointer(hibound)) - VARHDRSZ,
+ i,
+ minlen;
+ unsigned char *valstr = (unsigned char *) VARDATA(DatumGetPointer(value)),
+ *lostr = (unsigned char *) VARDATA(DatumGetPointer(lobound)),
+ *histr = (unsigned char *) VARDATA(DatumGetPointer(hibound));
+
+ /*
+ * Assume bytea data is uniformly distributed across all byte values.
+ */
+ rangelo = 0;
+ rangehi = 255;
+
+ /*
+ * Now strip any common prefix of the three strings.
+ */
+ minlen = Min(Min(valuelen, loboundlen), hiboundlen);
+ for (i = 0; i < minlen; i++)
+ {
+ if (*lostr != *histr || *lostr != *valstr)
+ break;
+ lostr++, histr++, valstr++;
+ loboundlen--, hiboundlen--, valuelen--;
+ }
+
+ /*
+ * Now we can do the conversions.
+ */
+ *scaledvalue = convert_one_bytea_to_scalar(valstr, valuelen, rangelo, rangehi);
+ *scaledlobound = convert_one_bytea_to_scalar(lostr, loboundlen, rangelo, rangehi);
+ *scaledhibound = convert_one_bytea_to_scalar(histr, hiboundlen, rangelo, rangehi);
+}
+
+static double
+convert_one_bytea_to_scalar(unsigned char *value, int valuelen,
+ int rangelo, int rangehi)
+{
+ double num,
+ denom,
+ base;
+
+ if (valuelen <= 0)
+ return 0.0; /* empty string has scalar value 0 */
+
+ /*
+ * Since base is 256, need not consider more than about 10
+ * chars (even this many seems like overkill)
+ */
+ if (valuelen > 10)
+ valuelen = 10;
+
+ /* Convert initial characters to fraction */
+ base = rangehi - rangelo + 1;
+ num = 0.0;
+ denom = base;
+ while (valuelen-- > 0)
+ {
+ int ch = *value++;
+
+ if (ch < rangelo)
+ ch = rangelo - 1;
+ else if (ch > rangehi)
+ ch = rangehi + 1;
+ num += ((double) (ch - rangelo)) / denom;
+ denom *= base;
+ }
+
+ return num;
+}
+
+/*
* Do convert_to_scalar()'s work for any timevalue data type.
*/
static double