From dd7a8f66ed278eef2f001a98e2312336c61ee527 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Sat, 25 Jul 2015 14:39:00 -0400 Subject: Redesign tablesample method API, and do extensive code review. The original implementation of TABLESAMPLE modeled the tablesample method API on index access methods, which wasn't a good choice because, without specialized DDL commands, there's no way to build an extension that can implement a TSM. (Raw inserts into system catalogs are not an acceptable thing to do, because we can't undo them during DROP EXTENSION, nor will pg_upgrade behave sanely.) Instead adopt an API more like procedural language handlers or foreign data wrappers, wherein the only SQL-level support object needed is a single handler function identified by having a special return type. This lets us get rid of the supporting catalog altogether, so that no custom DDL support is needed for the feature. Adjust the API so that it can support non-constant tablesample arguments (the original coding assumed we could evaluate the argument expressions at ExecInitSampleScan time, which is undesirable even if it weren't outright unsafe), and discourage sampling methods from looking at invisible tuples. Make sure that the BERNOULLI and SYSTEM methods are genuinely repeatable within and across queries, as required by the SQL standard, and deal more honestly with methods that can't support that requirement. Make a full code-review pass over the tablesample additions, and fix assorted bugs, omissions, infelicities, and cosmetic issues (such as failure to put the added code stanzas in a consistent ordering). Improve EXPLAIN's output of tablesample plans, too. Back-patch to 9.5 so that we don't have to support the original API in production. --- src/backend/parser/parse_func.c | 144 ---------------------------------------- 1 file changed, 144 deletions(-) (limited to 'src/backend/parser/parse_func.c') diff --git a/src/backend/parser/parse_func.c b/src/backend/parser/parse_func.c index 430baff1165..554ca9d8c47 100644 --- a/src/backend/parser/parse_func.c +++ b/src/backend/parser/parse_func.c @@ -18,7 +18,6 @@ #include "catalog/pg_aggregate.h" #include "catalog/pg_proc.h" #include "catalog/pg_type.h" -#include "catalog/pg_tablesample_method.h" #include "funcapi.h" #include "lib/stringinfo.h" #include "nodes/makefuncs.h" @@ -27,7 +26,6 @@ #include "parser/parse_clause.h" #include "parser/parse_coerce.h" #include "parser/parse_func.h" -#include "parser/parse_expr.h" #include "parser/parse_relation.h" #include "parser/parse_target.h" #include "parser/parse_type.h" @@ -769,148 +767,6 @@ ParseFuncOrColumn(ParseState *pstate, List *funcname, List *fargs, } -/* - * ParseTableSample - * - * Parse TABLESAMPLE clause and process the arguments - */ -TableSampleClause * -ParseTableSample(ParseState *pstate, char *samplemethod, Node *repeatable, - List *sampleargs, int location) -{ - HeapTuple tuple; - Form_pg_tablesample_method tsm; - Form_pg_proc procform; - TableSampleClause *tablesample; - List *fargs; - ListCell *larg; - int nargs, - initnargs; - Oid init_arg_types[FUNC_MAX_ARGS]; - - /* Load the tablesample method */ - tuple = SearchSysCache1(TABLESAMPLEMETHODNAME, PointerGetDatum(samplemethod)); - if (!HeapTupleIsValid(tuple)) - ereport(ERROR, - (errcode(ERRCODE_UNDEFINED_OBJECT), - errmsg("tablesample method \"%s\" does not exist", - samplemethod), - parser_errposition(pstate, location))); - - tablesample = makeNode(TableSampleClause); - tablesample->tsmid = HeapTupleGetOid(tuple); - - tsm = (Form_pg_tablesample_method) GETSTRUCT(tuple); - - tablesample->tsmseqscan = tsm->tsmseqscan; - tablesample->tsmpagemode = tsm->tsmpagemode; - tablesample->tsminit = tsm->tsminit; - tablesample->tsmnextblock = tsm->tsmnextblock; - tablesample->tsmnexttuple = tsm->tsmnexttuple; - tablesample->tsmexaminetuple = tsm->tsmexaminetuple; - tablesample->tsmend = tsm->tsmend; - tablesample->tsmreset = tsm->tsmreset; - tablesample->tsmcost = tsm->tsmcost; - - ReleaseSysCache(tuple); - - /* Validate the parameters against init function definition. */ - tuple = SearchSysCache1(PROCOID, - ObjectIdGetDatum(tablesample->tsminit)); - - if (!HeapTupleIsValid(tuple)) /* should not happen */ - elog(ERROR, "cache lookup failed for function %u", - tablesample->tsminit); - - procform = (Form_pg_proc) GETSTRUCT(tuple); - initnargs = procform->pronargs; - Assert(initnargs >= 3); - - /* - * First parameter is used to pass the SampleScanState, second is seed - * (REPEATABLE), skip the processing for them here, just assert that the - * types are correct. - */ - Assert(procform->proargtypes.values[0] == INTERNALOID); - Assert(procform->proargtypes.values[1] == INT4OID); - initnargs -= 2; - memcpy(init_arg_types, procform->proargtypes.values + 2, - initnargs * sizeof(Oid)); - - /* Now we are done with the catalog */ - ReleaseSysCache(tuple); - - /* Process repeatable (seed) */ - if (repeatable != NULL) - { - Node *arg = repeatable; - - if (arg && IsA(arg, A_Const)) - { - A_Const *con = (A_Const *) arg; - - if (con->val.type == T_Null) - ereport(ERROR, - (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("REPEATABLE clause must be NOT NULL numeric value"), - parser_errposition(pstate, con->location))); - - } - - arg = transformExpr(pstate, arg, EXPR_KIND_FROM_FUNCTION); - arg = coerce_to_specific_type(pstate, arg, INT4OID, "REPEATABLE"); - tablesample->repeatable = arg; - } - else - tablesample->repeatable = NULL; - - /* Check user provided expected number of arguments. */ - if (list_length(sampleargs) != initnargs) - ereport(ERROR, - (errcode(ERRCODE_TOO_MANY_ARGUMENTS), - errmsg_plural("tablesample method \"%s\" expects %d argument got %d", - "tablesample method \"%s\" expects %d arguments got %d", - initnargs, - samplemethod, - initnargs, list_length(sampleargs)), - parser_errposition(pstate, location))); - - /* Transform the arguments, typecasting them as needed. */ - fargs = NIL; - nargs = 0; - foreach(larg, sampleargs) - { - Node *inarg = (Node *) lfirst(larg); - Node *arg = transformExpr(pstate, inarg, EXPR_KIND_FROM_FUNCTION); - Oid argtype = exprType(arg); - - if (argtype != init_arg_types[nargs]) - { - if (!can_coerce_type(1, &argtype, &init_arg_types[nargs], - COERCION_IMPLICIT)) - ereport(ERROR, - (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("wrong parameter %d for tablesample method \"%s\"", - nargs + 1, samplemethod), - errdetail("Expected type %s got %s.", - format_type_be(init_arg_types[nargs]), - format_type_be(argtype)), - parser_errposition(pstate, exprLocation(inarg)))); - - arg = coerce_type(pstate, arg, argtype, init_arg_types[nargs], -1, - COERCION_IMPLICIT, COERCE_IMPLICIT_CAST, -1); - } - - fargs = lappend(fargs, arg); - nargs++; - } - - /* Pass the arguments down */ - tablesample->args = fargs; - - return tablesample; -} - /* func_match_argtypes() * * Given a list of candidate functions (having the right name and number -- cgit v1.2.3