diff options
author | Alvaro Herrera <alvherre@alvh.no-ip.org> | 2018-11-16 14:54:15 -0300 |
---|---|---|
committer | Alvaro Herrera <alvherre@alvh.no-ip.org> | 2018-11-16 15:01:05 -0300 |
commit | 3f2393edefa5ef2b6970a5a2fa2c7e9c55cc10cf (patch) | |
tree | e80c043f882bfdbd7ded4575d0041c14c924153b /src/include/executor/execPartition.h | |
parent | a387a3dff9001225ad571ff2755d139f5bd193b3 (diff) | |
download | postgresql-3f2393edefa5ef2b6970a5a2fa2c7e9c55cc10cf.tar.gz postgresql-3f2393edefa5ef2b6970a5a2fa2c7e9c55cc10cf.zip |
Redesign initialization of partition routing structures
This speeds up write operations (INSERT, UPDATE, DELETE, COPY, as well
as the future MERGE) on partitioned tables.
This changes the setup for tuple routing so that it does far less work
during the initial setup and pushes more work out to when partitions
receive tuples. PartitionDispatchData structs for sub-partitioned
tables are only created when a tuple gets routed through it. The
possibly large arrays in the PartitionTupleRouting struct have largely
been removed. The partitions[] array remains but now never contains any
NULL gaps. Previously the NULLs had to be skipped during
ExecCleanupTupleRouting(), which could add a large overhead to the
cleanup when the number of partitions was large. The partitions[] array
is allocated small to start with and only enlarged when we route tuples
to enough partitions that it runs out of space. This allows us to keep
simple single-row partition INSERTs running quickly. Redesign
The arrays in PartitionTupleRouting which stored the tuple translation maps
have now been removed. These have been moved out into a
PartitionRoutingInfo struct which is an additional field in ResultRelInfo.
The find_all_inheritors() call still remains by far the slowest part of
ExecSetupPartitionTupleRouting(). This commit just removes the other slow
parts.
In passing also rename the tuple translation maps from being ParentToChild
and ChildToParent to being RootToPartition and PartitionToRoot. The old
names mislead you into thinking that a partition of some sub-partitioned
table would translate to the rowtype of the sub-partitioned table rather
than the root partitioned table.
Authors: David Rowley and Amit Langote, heavily revised by Álvaro Herrera
Testing help from Jesper Pedersen and Kato Sho.
Discussion: https://postgr.es/m/CAKJS1f_1RJyFquuCKRFHTdcXqoPX-PYqAd7nz=GVBwvGh4a6xA@mail.gmail.com
Diffstat (limited to 'src/include/executor/execPartition.h')
-rw-r--r-- | src/include/executor/execPartition.h | 105 |
1 files changed, 28 insertions, 77 deletions
diff --git a/src/include/executor/execPartition.h b/src/include/executor/execPartition.h index 3e08104ea42..d3cfb55f9f1 100644 --- a/src/include/executor/execPartition.h +++ b/src/include/executor/execPartition.h @@ -18,74 +18,36 @@ #include "nodes/plannodes.h" #include "partitioning/partprune.h" -/* See execPartition.c for the definition. */ +/* See execPartition.c for the definitions. */ typedef struct PartitionDispatchData *PartitionDispatch; +typedef struct PartitionTupleRouting PartitionTupleRouting; -/*----------------------- - * PartitionTupleRouting - Encapsulates all information required to execute - * tuple-routing between partitions. +/* + * PartitionRoutingInfo * - * partition_dispatch_info Array of PartitionDispatch objects with one - * entry for every partitioned table in the - * partition tree. - * num_dispatch number of partitioned tables in the partition - * tree (= length of partition_dispatch_info[]) - * partition_oids Array of leaf partitions OIDs with one entry - * for every leaf partition in the partition tree, - * initialized in full by - * ExecSetupPartitionTupleRouting. - * partitions Array of ResultRelInfo* objects with one entry - * for every leaf partition in the partition tree, - * initialized lazily by ExecInitPartitionInfo. - * num_partitions Number of leaf partitions in the partition tree - * (= 'partitions_oid'/'partitions' array length) - * parent_child_tupconv_maps Array of TupleConversionMap objects with one - * entry for every leaf partition (required to - * convert tuple from the root table's rowtype to - * a leaf partition's rowtype after tuple routing - * is done) - * child_parent_tupconv_maps Array of TupleConversionMap objects with one - * entry for every leaf partition (required to - * convert an updated tuple from the leaf - * partition's rowtype to the root table's rowtype - * so that tuple routing can be done) - * child_parent_map_not_required Array of bool. True value means that a map is - * determined to be not required for the given - * partition. False means either we haven't yet - * checked if a map is required, or it was - * determined to be required. - * subplan_partition_offsets Integer array ordered by UPDATE subplans. Each - * element of this array has the index into the - * corresponding partition in partitions array. - * num_subplan_partition_offsets Length of 'subplan_partition_offsets' array - * partition_tuple_slots Array of TupleTableSlot objects; if non-NULL, - * contains one entry for every leaf partition, - * of which only those of the leaf partitions - * whose attribute numbers differ from the root - * parent have a non-NULL value. NULL if all of - * the partitions encountered by a given command - * happen to have same rowtype as the root parent - * root_tuple_slot TupleTableSlot to be used to transiently hold - * copy of a tuple that's being moved across - * partitions in the root partitioned table's - * rowtype - *----------------------- + * Additional result relation information specific to routing tuples to a + * table partition. */ -typedef struct PartitionTupleRouting +typedef struct PartitionRoutingInfo { - PartitionDispatch *partition_dispatch_info; - int num_dispatch; - Oid *partition_oids; - ResultRelInfo **partitions; - int num_partitions; - TupleConversionMap **parent_child_tupconv_maps; - TupleConversionMap **child_parent_tupconv_maps; - bool *child_parent_map_not_required; - int *subplan_partition_offsets; - int num_subplan_partition_offsets; - TupleTableSlot **partition_tuple_slots; - TupleTableSlot *root_tuple_slot; -} PartitionTupleRouting; + /* + * Map for converting tuples in root partitioned table format into + * partition format, or NULL if no conversion is required. + */ + TupleConversionMap *pi_RootToPartitionMap; + + /* + * Map for converting tuples in partition format into the root partitioned + * table format, or NULL if no conversion is required. + */ + TupleConversionMap *pi_PartitionToRootMap; + + /* + * Slot to store tuples in partition format, or NULL when no translation + * is required between root and partition. + */ + TupleTableSlot *pi_PartitionTupleSlot; +} PartitionRoutingInfo; /* * PartitionedRelPruningData - Per-partitioned-table data for run-time pruning @@ -175,22 +137,11 @@ typedef struct PartitionPruneState extern PartitionTupleRouting *ExecSetupPartitionTupleRouting(ModifyTableState *mtstate, Relation rel); -extern int ExecFindPartition(ResultRelInfo *resultRelInfo, - PartitionDispatch *pd, +extern ResultRelInfo *ExecFindPartition(ModifyTableState *mtstate, + ResultRelInfo *rootResultRelInfo, + PartitionTupleRouting *proute, TupleTableSlot *slot, EState *estate); -extern ResultRelInfo *ExecInitPartitionInfo(ModifyTableState *mtstate, - ResultRelInfo *resultRelInfo, - PartitionTupleRouting *proute, - EState *estate, int partidx); -extern void ExecInitRoutingInfo(ModifyTableState *mtstate, - EState *estate, - PartitionTupleRouting *proute, - ResultRelInfo *partRelInfo, - int partidx); -extern void ExecSetupChildParentMapForLeaf(PartitionTupleRouting *proute); -extern TupleConversionMap *TupConvMapForLeaf(PartitionTupleRouting *proute, - ResultRelInfo *rootRelInfo, int leaf_index); extern void ExecCleanupTupleRouting(ModifyTableState *mtstate, PartitionTupleRouting *proute); extern PartitionPruneState *ExecCreatePartitionPruneState(PlanState *planstate, |