diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/backend/statistics/mcv.c | 114 | ||||
-rw-r--r-- | src/include/catalog/catversion.h | 2 |
2 files changed, 77 insertions, 39 deletions
diff --git a/src/backend/statistics/mcv.c b/src/backend/statistics/mcv.c index ee581278328..e90a03fdf79 100644 --- a/src/backend/statistics/mcv.c +++ b/src/backend/statistics/mcv.c @@ -451,9 +451,9 @@ statext_mcv_load(Oid mvoid) * * The overall structure of the serialized representation looks like this: * - * +--------+----------------+---------------------+-------+ - * | header | dimension info | deduplicated values | items | - * +--------+----------------+---------------------+-------+ + * +---------------+----------------+---------------------+-------+ + * | header fields | dimension info | deduplicated values | items | + * +---------------+----------------+---------------------+-------+ * * Where dimension info stores information about type of K-th attribute (e.g. * typlen, typbyval and length of deduplicated values). Deduplicated values @@ -492,6 +492,7 @@ statext_mcv_serialize(MCVList * mcvlist, VacAttrStats **stats) /* serialized items (indexes into arrays, etc.) */ bytea *output; + char *raw; char *ptr; /* values per dimension (and number of non-NULL values) */ @@ -593,8 +594,12 @@ statext_mcv_serialize(MCVList * mcvlist, VacAttrStats **stats) info[dim].nbytes = 0; for (i = 0; i < info[dim].nvalues; i++) { + Size len; + values[dim][i] = PointerGetDatum(PG_DETOAST_DATUM(values[dim][i])); - info[dim].nbytes += VARSIZE_ANY(values[dim][i]); + + len = VARSIZE_ANY(values[dim][i]); + info[dim].nbytes += MAXALIGN(len); } } else if (info[dim].typlen == -2) /* cstring */ @@ -602,9 +607,13 @@ statext_mcv_serialize(MCVList * mcvlist, VacAttrStats **stats) info[dim].nbytes = 0; for (i = 0; i < info[dim].nvalues; i++) { + Size len; + /* c-strings include terminator, so +1 byte */ values[dim][i] = PointerGetDatum(PG_DETOAST_DATUM(values[dim][i])); - info[dim].nbytes += strlen(DatumGetCString(values[dim][i])) + 1; + + len = strlen(DatumGetCString(values[dim][i])) + 1; + info[dim].nbytes += MAXALIGN(len); } } @@ -617,20 +626,22 @@ statext_mcv_serialize(MCVList * mcvlist, VacAttrStats **stats) * whole serialized MCV list (varlena header, MCV header, dimension info * for each attribute, deduplicated values and items). */ - total_length = VARHDRSZ + offsetof(MCVList, items) - + (ndims * sizeof(DimensionInfo)) - + (mcvlist->nitems * itemsize); + total_length = offsetof(MCVList, items) + + MAXALIGN(ndims * sizeof(DimensionInfo)); /* add space for the arrays of deduplicated values */ for (i = 0; i < ndims; i++) - total_length += info[i].nbytes; + total_length += MAXALIGN(info[i].nbytes); - /* allocate space for the whole serialized MCV list */ - output = (bytea *) palloc(total_length); - SET_VARSIZE(output, total_length); + /* and finally the items (no additional alignment needed) */ + total_length += mcvlist->nitems * itemsize; - /* 'ptr' points to the current position in the output buffer */ - ptr = VARDATA(output); + /* + * Allocate space for the whole serialized MCV list (we'll skip bytes, + * so we set them to zero to make the result more compressible). + */ + raw = palloc0(total_length); + ptr = raw; /* copy the MCV list header */ memcpy(ptr, mcvlist, offsetof(MCVList, items)); @@ -638,7 +649,7 @@ statext_mcv_serialize(MCVList * mcvlist, VacAttrStats **stats) /* store information about the attributes */ memcpy(ptr, info, sizeof(DimensionInfo) * ndims); - ptr += sizeof(DimensionInfo) * ndims; + ptr += MAXALIGN(sizeof(DimensionInfo) * ndims); /* Copy the deduplicated values for all attributes to the output. */ for (dim = 0; dim < ndims; dim++) @@ -670,6 +681,7 @@ statext_mcv_serialize(MCVList * mcvlist, VacAttrStats **stats) } else if (info[dim].typlen > 0) /* pased by reference */ { + /* no special alignment needed, treated as char array */ memcpy(ptr, DatumGetPointer(value), info[dim].typlen); ptr += info[dim].typlen; } @@ -678,14 +690,14 @@ statext_mcv_serialize(MCVList * mcvlist, VacAttrStats **stats) int len = VARSIZE_ANY(value); memcpy(ptr, DatumGetPointer(value), len); - ptr += len; + ptr += MAXALIGN(len); } else if (info[dim].typlen == -2) /* cstring */ { Size len = strlen(DatumGetCString(value)) + 1; /* terminator */ memcpy(ptr, DatumGetCString(value), len); - ptr += len; + ptr += MAXALIGN(len); } /* no underflows or overflows */ @@ -694,6 +706,9 @@ statext_mcv_serialize(MCVList * mcvlist, VacAttrStats **stats) /* we should get exactly nbytes of data for this dimension */ Assert((ptr - start) == info[dim].nbytes); + + /* make sure the pointer is aligned correctly after each dimension */ + ptr = raw + MAXALIGN(ptr - raw); } /* Serialize the items, with uint16 indexes instead of the values. */ @@ -702,7 +717,7 @@ statext_mcv_serialize(MCVList * mcvlist, VacAttrStats **stats) MCVItem *mcvitem = &mcvlist->items[i]; /* don't write beyond the allocated space */ - Assert(ptr <= (char *) output + total_length - itemsize); + Assert(ptr <= raw + total_length - itemsize); /* reset the item (we only allocate it once and reuse it) */ memset(item, 0, itemsize); @@ -741,12 +756,19 @@ statext_mcv_serialize(MCVList * mcvlist, VacAttrStats **stats) } /* at this point we expect to match the total_length exactly */ - Assert((ptr - (char *) output) == total_length); + Assert((ptr - raw) == total_length); pfree(item); pfree(values); pfree(counts); + output = (bytea *) palloc(VARHDRSZ + total_length); + SET_VARSIZE(output, VARHDRSZ + total_length); + + memcpy(VARDATA_ANY(output), raw, total_length); + + pfree(raw); + return output; } @@ -764,6 +786,7 @@ statext_mcv_deserialize(bytea *data) i; Size expected_size; MCVList *mcvlist; + char *raw; char *ptr; int ndims, @@ -781,6 +804,7 @@ statext_mcv_deserialize(bytea *data) Size datalen; char *dataptr; char *valuesptr; + char *isnullptr; if (data == NULL) return NULL; @@ -797,7 +821,10 @@ statext_mcv_deserialize(bytea *data) mcvlist = (MCVList *) palloc0(offsetof(MCVList, items)); /* initialize pointer to the data part (skip the varlena header) */ - ptr = VARDATA_ANY(data); + raw = palloc(VARSIZE_ANY_EXHDR(data)); + ptr = raw; + + memcpy(raw, VARDATA_ANY(data), VARSIZE_ANY_EXHDR(data)); /* get the header and perform further sanity checks */ memcpy(mcvlist, ptr, offsetof(MCVList, items)); @@ -848,7 +875,7 @@ statext_mcv_deserialize(bytea *data) /* Now it's safe to access the dimension info. */ info = (DimensionInfo *) ptr; - ptr += ndims * sizeof(DimensionInfo); + ptr += MAXALIGN(ndims * sizeof(DimensionInfo)); /* account for the value arrays */ for (dim = 0; dim < ndims; dim++) @@ -860,7 +887,7 @@ statext_mcv_deserialize(bytea *data) Assert(info[dim].nvalues >= 0); Assert(info[dim].nbytes >= 0); - expected_size += info[dim].nbytes; + expected_size += MAXALIGN(info[dim].nbytes); } /* @@ -890,7 +917,7 @@ statext_mcv_deserialize(bytea *data) /* space needed for a copy of data for by-ref types */ if (!info[dim].typbyval) - datalen += info[dim].nbytes; + datalen += MAXALIGN(info[dim].nbytes); } /* @@ -899,19 +926,25 @@ statext_mcv_deserialize(bytea *data) * original data - it may disappear while we're still using the MCV list, * e.g. due to catcache release. Only needed for by-ref types. */ - mcvlen = offsetof(MCVList, items) + - +(sizeof(MCVItem) * nitems) /* array of MCVItem */ - + ((sizeof(Datum) + sizeof(bool)) * ndims * nitems) + - +datalen; /* by-ref data */ + mcvlen = MAXALIGN(offsetof(MCVList, items) + (sizeof(MCVItem) * nitems)); + + /* arrays of values and isnull flags for all MCV items */ + mcvlen += MAXALIGN(sizeof(Datum) * ndims * nitems); + mcvlen += MAXALIGN(sizeof(bool) * ndims * nitems); + /* we don't quite need to align this, but it makes some assers easier */ + mcvlen += MAXALIGN(datalen); + + /* now resize the deserialized MCV list, and compute pointers to parts */ mcvlist = repalloc(mcvlist, mcvlen); - /* pointer to the beginning of values/isnull space */ - valuesptr = (char *) mcvlist + offsetof(MCVList, items) - + (sizeof(MCVItem) * nitems); + /* pointer to the beginning of values/isnull arrays */ + valuesptr = (char *) mcvlist + + MAXALIGN(offsetof(MCVList, items) + (sizeof(MCVItem) * nitems)); + + isnullptr = valuesptr + (MAXALIGN(sizeof(Datum) * ndims * nitems)); - /* get pointer where to store the data */ - dataptr = (char *) mcvlist + (mcvlen - datalen); + dataptr = isnullptr + (MAXALIGN(sizeof(bool) * ndims * nitems)); /* * Build mapping (index => value) for translating the serialized data into @@ -963,11 +996,11 @@ statext_mcv_deserialize(bytea *data) Size len = VARSIZE_ANY(ptr); memcpy(dataptr, ptr, len); - ptr += len; + ptr += MAXALIGN(len); /* just point into the array */ map[dim][i] = PointerGetDatum(dataptr); - dataptr += len; + dataptr += MAXALIGN(len); } } else if (info[dim].typlen == -2) @@ -978,11 +1011,11 @@ statext_mcv_deserialize(bytea *data) Size len = (strlen(ptr) + 1); /* don't forget the \0 */ memcpy(dataptr, ptr, len); - ptr += len; + ptr += MAXALIGN(len); /* just point into the array */ map[dim][i] = PointerGetDatum(dataptr); - dataptr += len; + dataptr += MAXALIGN(len); } } @@ -995,6 +1028,9 @@ statext_mcv_deserialize(bytea *data) /* check we consumed input data for this dimension exactly */ Assert(ptr == (start + info[dim].nbytes)); + + /* ensure proper alignment of the data */ + ptr = raw + MAXALIGN(ptr - raw); } /* we should have also filled the MCV list exactly */ @@ -1027,16 +1063,18 @@ statext_mcv_deserialize(bytea *data) ptr += ITEM_SIZE(ndims); /* check we're not overflowing the input */ - Assert(ptr <= (char *) data + VARSIZE_ANY(data)); + Assert(ptr <= (char *) raw + VARSIZE_ANY_EXHDR(data)); } /* check that we processed all the data */ - Assert(ptr == (char *) data + VARSIZE_ANY(data)); + Assert(ptr == raw + VARSIZE_ANY_EXHDR(data)); /* release the buffers used for mapping */ for (dim = 0; dim < ndims; dim++) pfree(map[dim]); + pfree(map); + pfree(raw); return mcvlist; } diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h index f01087614e4..485cf422d96 100644 --- a/src/include/catalog/catversion.h +++ b/src/include/catalog/catversion.h @@ -53,6 +53,6 @@ */ /* yyyymmddN */ -#define CATALOG_VERSION_NO 201903271 +#define CATALOG_VERSION_NO 201903291 #endif |