aboutsummaryrefslogtreecommitdiff
path: root/src/include/executor/execPartition.h
diff options
context:
space:
mode:
authorAlvaro Herrera <alvherre@alvh.no-ip.org>2018-11-16 14:54:15 -0300
committerAlvaro Herrera <alvherre@alvh.no-ip.org>2018-11-16 15:01:05 -0300
commit3f2393edefa5ef2b6970a5a2fa2c7e9c55cc10cf (patch)
treee80c043f882bfdbd7ded4575d0041c14c924153b /src/include/executor/execPartition.h
parenta387a3dff9001225ad571ff2755d139f5bd193b3 (diff)
downloadpostgresql-3f2393edefa5ef2b6970a5a2fa2c7e9c55cc10cf.tar.gz
postgresql-3f2393edefa5ef2b6970a5a2fa2c7e9c55cc10cf.zip
Redesign initialization of partition routing structures
This speeds up write operations (INSERT, UPDATE, DELETE, COPY, as well as the future MERGE) on partitioned tables. This changes the setup for tuple routing so that it does far less work during the initial setup and pushes more work out to when partitions receive tuples. PartitionDispatchData structs for sub-partitioned tables are only created when a tuple gets routed through it. The possibly large arrays in the PartitionTupleRouting struct have largely been removed. The partitions[] array remains but now never contains any NULL gaps. Previously the NULLs had to be skipped during ExecCleanupTupleRouting(), which could add a large overhead to the cleanup when the number of partitions was large. The partitions[] array is allocated small to start with and only enlarged when we route tuples to enough partitions that it runs out of space. This allows us to keep simple single-row partition INSERTs running quickly. Redesign The arrays in PartitionTupleRouting which stored the tuple translation maps have now been removed. These have been moved out into a PartitionRoutingInfo struct which is an additional field in ResultRelInfo. The find_all_inheritors() call still remains by far the slowest part of ExecSetupPartitionTupleRouting(). This commit just removes the other slow parts. In passing also rename the tuple translation maps from being ParentToChild and ChildToParent to being RootToPartition and PartitionToRoot. The old names mislead you into thinking that a partition of some sub-partitioned table would translate to the rowtype of the sub-partitioned table rather than the root partitioned table. Authors: David Rowley and Amit Langote, heavily revised by Álvaro Herrera Testing help from Jesper Pedersen and Kato Sho. Discussion: https://postgr.es/m/CAKJS1f_1RJyFquuCKRFHTdcXqoPX-PYqAd7nz=GVBwvGh4a6xA@mail.gmail.com
Diffstat (limited to 'src/include/executor/execPartition.h')
-rw-r--r--src/include/executor/execPartition.h105
1 files changed, 28 insertions, 77 deletions
diff --git a/src/include/executor/execPartition.h b/src/include/executor/execPartition.h
index 3e08104ea42..d3cfb55f9f1 100644
--- a/src/include/executor/execPartition.h
+++ b/src/include/executor/execPartition.h
@@ -18,74 +18,36 @@
#include "nodes/plannodes.h"
#include "partitioning/partprune.h"
-/* See execPartition.c for the definition. */
+/* See execPartition.c for the definitions. */
typedef struct PartitionDispatchData *PartitionDispatch;
+typedef struct PartitionTupleRouting PartitionTupleRouting;
-/*-----------------------
- * PartitionTupleRouting - Encapsulates all information required to execute
- * tuple-routing between partitions.
+/*
+ * PartitionRoutingInfo
*
- * partition_dispatch_info Array of PartitionDispatch objects with one
- * entry for every partitioned table in the
- * partition tree.
- * num_dispatch number of partitioned tables in the partition
- * tree (= length of partition_dispatch_info[])
- * partition_oids Array of leaf partitions OIDs with one entry
- * for every leaf partition in the partition tree,
- * initialized in full by
- * ExecSetupPartitionTupleRouting.
- * partitions Array of ResultRelInfo* objects with one entry
- * for every leaf partition in the partition tree,
- * initialized lazily by ExecInitPartitionInfo.
- * num_partitions Number of leaf partitions in the partition tree
- * (= 'partitions_oid'/'partitions' array length)
- * parent_child_tupconv_maps Array of TupleConversionMap objects with one
- * entry for every leaf partition (required to
- * convert tuple from the root table's rowtype to
- * a leaf partition's rowtype after tuple routing
- * is done)
- * child_parent_tupconv_maps Array of TupleConversionMap objects with one
- * entry for every leaf partition (required to
- * convert an updated tuple from the leaf
- * partition's rowtype to the root table's rowtype
- * so that tuple routing can be done)
- * child_parent_map_not_required Array of bool. True value means that a map is
- * determined to be not required for the given
- * partition. False means either we haven't yet
- * checked if a map is required, or it was
- * determined to be required.
- * subplan_partition_offsets Integer array ordered by UPDATE subplans. Each
- * element of this array has the index into the
- * corresponding partition in partitions array.
- * num_subplan_partition_offsets Length of 'subplan_partition_offsets' array
- * partition_tuple_slots Array of TupleTableSlot objects; if non-NULL,
- * contains one entry for every leaf partition,
- * of which only those of the leaf partitions
- * whose attribute numbers differ from the root
- * parent have a non-NULL value. NULL if all of
- * the partitions encountered by a given command
- * happen to have same rowtype as the root parent
- * root_tuple_slot TupleTableSlot to be used to transiently hold
- * copy of a tuple that's being moved across
- * partitions in the root partitioned table's
- * rowtype
- *-----------------------
+ * Additional result relation information specific to routing tuples to a
+ * table partition.
*/
-typedef struct PartitionTupleRouting
+typedef struct PartitionRoutingInfo
{
- PartitionDispatch *partition_dispatch_info;
- int num_dispatch;
- Oid *partition_oids;
- ResultRelInfo **partitions;
- int num_partitions;
- TupleConversionMap **parent_child_tupconv_maps;
- TupleConversionMap **child_parent_tupconv_maps;
- bool *child_parent_map_not_required;
- int *subplan_partition_offsets;
- int num_subplan_partition_offsets;
- TupleTableSlot **partition_tuple_slots;
- TupleTableSlot *root_tuple_slot;
-} PartitionTupleRouting;
+ /*
+ * Map for converting tuples in root partitioned table format into
+ * partition format, or NULL if no conversion is required.
+ */
+ TupleConversionMap *pi_RootToPartitionMap;
+
+ /*
+ * Map for converting tuples in partition format into the root partitioned
+ * table format, or NULL if no conversion is required.
+ */
+ TupleConversionMap *pi_PartitionToRootMap;
+
+ /*
+ * Slot to store tuples in partition format, or NULL when no translation
+ * is required between root and partition.
+ */
+ TupleTableSlot *pi_PartitionTupleSlot;
+} PartitionRoutingInfo;
/*
* PartitionedRelPruningData - Per-partitioned-table data for run-time pruning
@@ -175,22 +137,11 @@ typedef struct PartitionPruneState
extern PartitionTupleRouting *ExecSetupPartitionTupleRouting(ModifyTableState *mtstate,
Relation rel);
-extern int ExecFindPartition(ResultRelInfo *resultRelInfo,
- PartitionDispatch *pd,
+extern ResultRelInfo *ExecFindPartition(ModifyTableState *mtstate,
+ ResultRelInfo *rootResultRelInfo,
+ PartitionTupleRouting *proute,
TupleTableSlot *slot,
EState *estate);
-extern ResultRelInfo *ExecInitPartitionInfo(ModifyTableState *mtstate,
- ResultRelInfo *resultRelInfo,
- PartitionTupleRouting *proute,
- EState *estate, int partidx);
-extern void ExecInitRoutingInfo(ModifyTableState *mtstate,
- EState *estate,
- PartitionTupleRouting *proute,
- ResultRelInfo *partRelInfo,
- int partidx);
-extern void ExecSetupChildParentMapForLeaf(PartitionTupleRouting *proute);
-extern TupleConversionMap *TupConvMapForLeaf(PartitionTupleRouting *proute,
- ResultRelInfo *rootRelInfo, int leaf_index);
extern void ExecCleanupTupleRouting(ModifyTableState *mtstate,
PartitionTupleRouting *proute);
extern PartitionPruneState *ExecCreatePartitionPruneState(PlanState *planstate,