From 9aab83fc5039d83e84144b7bed3fb1d62a74ae78 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Sat, 13 May 2017 15:14:39 -0400 Subject: Redesign get_attstatsslot()/free_attstatsslot() for more safety and speed. The mess cleaned up in commit da0759600 is clear evidence that it's a bug hazard to expect the caller of get_attstatsslot()/free_attstatsslot() to provide the correct type OID for the array elements in the slot. Moreover, we weren't even getting any performance benefit from that, since get_attstatsslot() was extracting the real type OID from the array anyway. So we ought to get rid of that requirement; indeed, it would make more sense for get_attstatsslot() to pass back the type OID it found, in case the caller isn't sure what to expect, which is likely in binary- compatible-operator cases. Another problem with the current implementation is that if the stats array element type is pass-by-reference, we incur a palloc/memcpy/pfree cycle for each element. That seemed acceptable when the code was written because we were targeting O(10) array sizes --- but these days, stats arrays are almost always bigger than that, sometimes much bigger. We can save a significant number of cycles by doing one palloc/memcpy/pfree of the whole array. Indeed, in the now-probably-common case where the array is toasted, that happens anyway so this method is basically free. (Note: although the catcache code will inline any out-of-line toasted values, it doesn't decompress them. At the other end of the size range, it doesn't expand short-header datums either. In either case, DatumGetArrayTypeP would have to make a copy. We do end up using an extra array copy step if the element type is pass-by-value and the array length is neither small enough for a short header nor large enough to have suffered compression. But that seems like a very acceptable price for winning in pass-by-ref cases.) Hence, redesign to take these insights into account. While at it, convert to an API in which we fill a struct rather than passing a bunch of pointers to individual output arguments. That will make it less painful if we ever want further expansion of what get_attstatsslot can pass back. It's certainly arguable that this is new development and not something to push post-feature-freeze. However, I view it as primarily bug-proofing and therefore something that's better to have sooner not later. Since we aren't quite at beta phase yet, let's put it in. Discussion: https://postgr.es/m/16364.1494520862@sss.pgh.pa.us --- src/backend/utils/adt/array_selfuncs.c | 85 +++++++++++++--------------------- 1 file changed, 31 insertions(+), 54 deletions(-) (limited to 'src/backend/utils/adt/array_selfuncs.c') diff --git a/src/backend/utils/adt/array_selfuncs.c b/src/backend/utils/adt/array_selfuncs.c index cfaf87335a8..3ae6018c67e 100644 --- a/src/backend/utils/adt/array_selfuncs.c +++ b/src/backend/utils/adt/array_selfuncs.c @@ -137,35 +137,22 @@ scalararraysel_containment(PlannerInfo *root, statistic_proc_security_check(&vardata, cmpfunc->fn_oid)) { Form_pg_statistic stats; - Datum *values; - int nvalues; - float4 *numbers; - int nnumbers; - float4 *hist; - int nhist; + AttStatsSlot sslot; + AttStatsSlot hslot; stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple); /* MCELEM will be an array of same type as element */ - if (get_attstatsslot(vardata.statsTuple, - elemtype, vardata.atttypmod, + if (get_attstatsslot(&sslot, vardata.statsTuple, STATISTIC_KIND_MCELEM, InvalidOid, - NULL, - &values, &nvalues, - &numbers, &nnumbers)) + ATTSTATSSLOT_VALUES | ATTSTATSSLOT_NUMBERS)) { /* For ALL case, also get histogram of distinct-element counts */ if (useOr || - !get_attstatsslot(vardata.statsTuple, - elemtype, vardata.atttypmod, + !get_attstatsslot(&hslot, vardata.statsTuple, STATISTIC_KIND_DECHIST, InvalidOid, - NULL, - NULL, NULL, - &hist, &nhist)) - { - hist = NULL; - nhist = 0; - } + ATTSTATSSLOT_NUMBERS)) + memset(&hslot, 0, sizeof(hslot)); /* * For = ANY, estimate as var @> ARRAY[const]. @@ -173,22 +160,26 @@ scalararraysel_containment(PlannerInfo *root, * For = ALL, estimate as var <@ ARRAY[const]. */ if (useOr) - selec = mcelem_array_contain_overlap_selec(values, nvalues, - numbers, nnumbers, + selec = mcelem_array_contain_overlap_selec(sslot.values, + sslot.nvalues, + sslot.numbers, + sslot.nnumbers, &constval, 1, OID_ARRAY_CONTAINS_OP, cmpfunc); else - selec = mcelem_array_contained_selec(values, nvalues, - numbers, nnumbers, + selec = mcelem_array_contained_selec(sslot.values, + sslot.nvalues, + sslot.numbers, + sslot.nnumbers, &constval, 1, - hist, nhist, + hslot.numbers, + hslot.nnumbers, OID_ARRAY_CONTAINED_OP, cmpfunc); - if (hist) - free_attstatsslot(elemtype, NULL, 0, hist, nhist); - free_attstatsslot(elemtype, values, nvalues, numbers, nnumbers); + free_attstatsslot(&hslot); + free_attstatsslot(&sslot); } else { @@ -369,49 +360,35 @@ calc_arraycontsel(VariableStatData *vardata, Datum constval, statistic_proc_security_check(vardata, cmpfunc->fn_oid)) { Form_pg_statistic stats; - Datum *values; - int nvalues; - float4 *numbers; - int nnumbers; - float4 *hist; - int nhist; + AttStatsSlot sslot; + AttStatsSlot hslot; stats = (Form_pg_statistic) GETSTRUCT(vardata->statsTuple); /* MCELEM will be an array of same type as column */ - if (get_attstatsslot(vardata->statsTuple, - elemtype, vardata->atttypmod, + if (get_attstatsslot(&sslot, vardata->statsTuple, STATISTIC_KIND_MCELEM, InvalidOid, - NULL, - &values, &nvalues, - &numbers, &nnumbers)) + ATTSTATSSLOT_VALUES | ATTSTATSSLOT_NUMBERS)) { /* * For "array <@ const" case we also need histogram of distinct * element counts. */ if (operator != OID_ARRAY_CONTAINED_OP || - !get_attstatsslot(vardata->statsTuple, - elemtype, vardata->atttypmod, + !get_attstatsslot(&hslot, vardata->statsTuple, STATISTIC_KIND_DECHIST, InvalidOid, - NULL, - NULL, NULL, - &hist, &nhist)) - { - hist = NULL; - nhist = 0; - } + ATTSTATSSLOT_NUMBERS)) + memset(&hslot, 0, sizeof(hslot)); /* Use the most-common-elements slot for the array Var. */ selec = mcelem_array_selec(array, typentry, - values, nvalues, - numbers, nnumbers, - hist, nhist, + sslot.values, sslot.nvalues, + sslot.numbers, sslot.nnumbers, + hslot.numbers, hslot.nnumbers, operator, cmpfunc); - if (hist) - free_attstatsslot(elemtype, NULL, 0, hist, nhist); - free_attstatsslot(elemtype, values, nvalues, numbers, nnumbers); + free_attstatsslot(&hslot); + free_attstatsslot(&sslot); } else { -- cgit v1.2.3