aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/backend/statistics/mcv.c114
-rw-r--r--src/include/catalog/catversion.h2
2 files changed, 77 insertions, 39 deletions
diff --git a/src/backend/statistics/mcv.c b/src/backend/statistics/mcv.c
index ee581278328..e90a03fdf79 100644
--- a/src/backend/statistics/mcv.c
+++ b/src/backend/statistics/mcv.c
@@ -451,9 +451,9 @@ statext_mcv_load(Oid mvoid)
*
* The overall structure of the serialized representation looks like this:
*
- * +--------+----------------+---------------------+-------+
- * | header | dimension info | deduplicated values | items |
- * +--------+----------------+---------------------+-------+
+ * +---------------+----------------+---------------------+-------+
+ * | header fields | dimension info | deduplicated values | items |
+ * +---------------+----------------+---------------------+-------+
*
* Where dimension info stores information about type of K-th attribute (e.g.
* typlen, typbyval and length of deduplicated values). Deduplicated values
@@ -492,6 +492,7 @@ statext_mcv_serialize(MCVList * mcvlist, VacAttrStats **stats)
/* serialized items (indexes into arrays, etc.) */
bytea *output;
+ char *raw;
char *ptr;
/* values per dimension (and number of non-NULL values) */
@@ -593,8 +594,12 @@ statext_mcv_serialize(MCVList * mcvlist, VacAttrStats **stats)
info[dim].nbytes = 0;
for (i = 0; i < info[dim].nvalues; i++)
{
+ Size len;
+
values[dim][i] = PointerGetDatum(PG_DETOAST_DATUM(values[dim][i]));
- info[dim].nbytes += VARSIZE_ANY(values[dim][i]);
+
+ len = VARSIZE_ANY(values[dim][i]);
+ info[dim].nbytes += MAXALIGN(len);
}
}
else if (info[dim].typlen == -2) /* cstring */
@@ -602,9 +607,13 @@ statext_mcv_serialize(MCVList * mcvlist, VacAttrStats **stats)
info[dim].nbytes = 0;
for (i = 0; i < info[dim].nvalues; i++)
{
+ Size len;
+
/* c-strings include terminator, so +1 byte */
values[dim][i] = PointerGetDatum(PG_DETOAST_DATUM(values[dim][i]));
- info[dim].nbytes += strlen(DatumGetCString(values[dim][i])) + 1;
+
+ len = strlen(DatumGetCString(values[dim][i])) + 1;
+ info[dim].nbytes += MAXALIGN(len);
}
}
@@ -617,20 +626,22 @@ statext_mcv_serialize(MCVList * mcvlist, VacAttrStats **stats)
* whole serialized MCV list (varlena header, MCV header, dimension info
* for each attribute, deduplicated values and items).
*/
- total_length = VARHDRSZ + offsetof(MCVList, items)
- + (ndims * sizeof(DimensionInfo))
- + (mcvlist->nitems * itemsize);
+ total_length = offsetof(MCVList, items)
+ + MAXALIGN(ndims * sizeof(DimensionInfo));
/* add space for the arrays of deduplicated values */
for (i = 0; i < ndims; i++)
- total_length += info[i].nbytes;
+ total_length += MAXALIGN(info[i].nbytes);
- /* allocate space for the whole serialized MCV list */
- output = (bytea *) palloc(total_length);
- SET_VARSIZE(output, total_length);
+ /* and finally the items (no additional alignment needed) */
+ total_length += mcvlist->nitems * itemsize;
- /* 'ptr' points to the current position in the output buffer */
- ptr = VARDATA(output);
+ /*
+ * Allocate space for the whole serialized MCV list (we'll skip bytes,
+ * so we set them to zero to make the result more compressible).
+ */
+ raw = palloc0(total_length);
+ ptr = raw;
/* copy the MCV list header */
memcpy(ptr, mcvlist, offsetof(MCVList, items));
@@ -638,7 +649,7 @@ statext_mcv_serialize(MCVList * mcvlist, VacAttrStats **stats)
/* store information about the attributes */
memcpy(ptr, info, sizeof(DimensionInfo) * ndims);
- ptr += sizeof(DimensionInfo) * ndims;
+ ptr += MAXALIGN(sizeof(DimensionInfo) * ndims);
/* Copy the deduplicated values for all attributes to the output. */
for (dim = 0; dim < ndims; dim++)
@@ -670,6 +681,7 @@ statext_mcv_serialize(MCVList * mcvlist, VacAttrStats **stats)
}
else if (info[dim].typlen > 0) /* pased by reference */
{
+ /* no special alignment needed, treated as char array */
memcpy(ptr, DatumGetPointer(value), info[dim].typlen);
ptr += info[dim].typlen;
}
@@ -678,14 +690,14 @@ statext_mcv_serialize(MCVList * mcvlist, VacAttrStats **stats)
int len = VARSIZE_ANY(value);
memcpy(ptr, DatumGetPointer(value), len);
- ptr += len;
+ ptr += MAXALIGN(len);
}
else if (info[dim].typlen == -2) /* cstring */
{
Size len = strlen(DatumGetCString(value)) + 1; /* terminator */
memcpy(ptr, DatumGetCString(value), len);
- ptr += len;
+ ptr += MAXALIGN(len);
}
/* no underflows or overflows */
@@ -694,6 +706,9 @@ statext_mcv_serialize(MCVList * mcvlist, VacAttrStats **stats)
/* we should get exactly nbytes of data for this dimension */
Assert((ptr - start) == info[dim].nbytes);
+
+ /* make sure the pointer is aligned correctly after each dimension */
+ ptr = raw + MAXALIGN(ptr - raw);
}
/* Serialize the items, with uint16 indexes instead of the values. */
@@ -702,7 +717,7 @@ statext_mcv_serialize(MCVList * mcvlist, VacAttrStats **stats)
MCVItem *mcvitem = &mcvlist->items[i];
/* don't write beyond the allocated space */
- Assert(ptr <= (char *) output + total_length - itemsize);
+ Assert(ptr <= raw + total_length - itemsize);
/* reset the item (we only allocate it once and reuse it) */
memset(item, 0, itemsize);
@@ -741,12 +756,19 @@ statext_mcv_serialize(MCVList * mcvlist, VacAttrStats **stats)
}
/* at this point we expect to match the total_length exactly */
- Assert((ptr - (char *) output) == total_length);
+ Assert((ptr - raw) == total_length);
pfree(item);
pfree(values);
pfree(counts);
+ output = (bytea *) palloc(VARHDRSZ + total_length);
+ SET_VARSIZE(output, VARHDRSZ + total_length);
+
+ memcpy(VARDATA_ANY(output), raw, total_length);
+
+ pfree(raw);
+
return output;
}
@@ -764,6 +786,7 @@ statext_mcv_deserialize(bytea *data)
i;
Size expected_size;
MCVList *mcvlist;
+ char *raw;
char *ptr;
int ndims,
@@ -781,6 +804,7 @@ statext_mcv_deserialize(bytea *data)
Size datalen;
char *dataptr;
char *valuesptr;
+ char *isnullptr;
if (data == NULL)
return NULL;
@@ -797,7 +821,10 @@ statext_mcv_deserialize(bytea *data)
mcvlist = (MCVList *) palloc0(offsetof(MCVList, items));
/* initialize pointer to the data part (skip the varlena header) */
- ptr = VARDATA_ANY(data);
+ raw = palloc(VARSIZE_ANY_EXHDR(data));
+ ptr = raw;
+
+ memcpy(raw, VARDATA_ANY(data), VARSIZE_ANY_EXHDR(data));
/* get the header and perform further sanity checks */
memcpy(mcvlist, ptr, offsetof(MCVList, items));
@@ -848,7 +875,7 @@ statext_mcv_deserialize(bytea *data)
/* Now it's safe to access the dimension info. */
info = (DimensionInfo *) ptr;
- ptr += ndims * sizeof(DimensionInfo);
+ ptr += MAXALIGN(ndims * sizeof(DimensionInfo));
/* account for the value arrays */
for (dim = 0; dim < ndims; dim++)
@@ -860,7 +887,7 @@ statext_mcv_deserialize(bytea *data)
Assert(info[dim].nvalues >= 0);
Assert(info[dim].nbytes >= 0);
- expected_size += info[dim].nbytes;
+ expected_size += MAXALIGN(info[dim].nbytes);
}
/*
@@ -890,7 +917,7 @@ statext_mcv_deserialize(bytea *data)
/* space needed for a copy of data for by-ref types */
if (!info[dim].typbyval)
- datalen += info[dim].nbytes;
+ datalen += MAXALIGN(info[dim].nbytes);
}
/*
@@ -899,19 +926,25 @@ statext_mcv_deserialize(bytea *data)
* original data - it may disappear while we're still using the MCV list,
* e.g. due to catcache release. Only needed for by-ref types.
*/
- mcvlen = offsetof(MCVList, items) +
- +(sizeof(MCVItem) * nitems) /* array of MCVItem */
- + ((sizeof(Datum) + sizeof(bool)) * ndims * nitems) +
- +datalen; /* by-ref data */
+ mcvlen = MAXALIGN(offsetof(MCVList, items) + (sizeof(MCVItem) * nitems));
+
+ /* arrays of values and isnull flags for all MCV items */
+ mcvlen += MAXALIGN(sizeof(Datum) * ndims * nitems);
+ mcvlen += MAXALIGN(sizeof(bool) * ndims * nitems);
+ /* we don't quite need to align this, but it makes some assers easier */
+ mcvlen += MAXALIGN(datalen);
+
+ /* now resize the deserialized MCV list, and compute pointers to parts */
mcvlist = repalloc(mcvlist, mcvlen);
- /* pointer to the beginning of values/isnull space */
- valuesptr = (char *) mcvlist + offsetof(MCVList, items)
- + (sizeof(MCVItem) * nitems);
+ /* pointer to the beginning of values/isnull arrays */
+ valuesptr = (char *) mcvlist
+ + MAXALIGN(offsetof(MCVList, items) + (sizeof(MCVItem) * nitems));
+
+ isnullptr = valuesptr + (MAXALIGN(sizeof(Datum) * ndims * nitems));
- /* get pointer where to store the data */
- dataptr = (char *) mcvlist + (mcvlen - datalen);
+ dataptr = isnullptr + (MAXALIGN(sizeof(bool) * ndims * nitems));
/*
* Build mapping (index => value) for translating the serialized data into
@@ -963,11 +996,11 @@ statext_mcv_deserialize(bytea *data)
Size len = VARSIZE_ANY(ptr);
memcpy(dataptr, ptr, len);
- ptr += len;
+ ptr += MAXALIGN(len);
/* just point into the array */
map[dim][i] = PointerGetDatum(dataptr);
- dataptr += len;
+ dataptr += MAXALIGN(len);
}
}
else if (info[dim].typlen == -2)
@@ -978,11 +1011,11 @@ statext_mcv_deserialize(bytea *data)
Size len = (strlen(ptr) + 1); /* don't forget the \0 */
memcpy(dataptr, ptr, len);
- ptr += len;
+ ptr += MAXALIGN(len);
/* just point into the array */
map[dim][i] = PointerGetDatum(dataptr);
- dataptr += len;
+ dataptr += MAXALIGN(len);
}
}
@@ -995,6 +1028,9 @@ statext_mcv_deserialize(bytea *data)
/* check we consumed input data for this dimension exactly */
Assert(ptr == (start + info[dim].nbytes));
+
+ /* ensure proper alignment of the data */
+ ptr = raw + MAXALIGN(ptr - raw);
}
/* we should have also filled the MCV list exactly */
@@ -1027,16 +1063,18 @@ statext_mcv_deserialize(bytea *data)
ptr += ITEM_SIZE(ndims);
/* check we're not overflowing the input */
- Assert(ptr <= (char *) data + VARSIZE_ANY(data));
+ Assert(ptr <= (char *) raw + VARSIZE_ANY_EXHDR(data));
}
/* check that we processed all the data */
- Assert(ptr == (char *) data + VARSIZE_ANY(data));
+ Assert(ptr == raw + VARSIZE_ANY_EXHDR(data));
/* release the buffers used for mapping */
for (dim = 0; dim < ndims; dim++)
pfree(map[dim]);
+
pfree(map);
+ pfree(raw);
return mcvlist;
}
diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h
index f01087614e4..485cf422d96 100644
--- a/src/include/catalog/catversion.h
+++ b/src/include/catalog/catversion.h
@@ -53,6 +53,6 @@
*/
/* yyyymmddN */
-#define CATALOG_VERSION_NO 201903271
+#define CATALOG_VERSION_NO 201903291
#endif