Optimize joins when the inner relation can be proven unique.

If there can certainly be no more than one matching inner row for a given outer row, then the executor can move on to the next outer row as soon as it's found one match; there's no need to continue scanning the inner relation for this outer row. This saves useless scanning in nestloop and hash joins. In merge joins, it offers the opportunity to skip mark/restore processing, because we know we have not advanced past the first possible match for the next outer row. Of course, the devil is in the details: the proof of uniqueness must depend only on joinquals (not otherquals), and if we want to skip mergejoin mark/restore then it must depend only on merge clauses. To avoid adding more planning overhead than absolutely necessary, the present patch errs in the conservative direction: there are cases where inner_unique or skip_mark_restore processing could be used, but it will not do so because it's not sure that the uniqueness proof depended only on "safe" clauses. This could be improved later. David Rowley, reviewed and rather heavily editorialized on by me Discussion: https://postgr.es/m/CAApHDvqF6Sw-TK98bW48TdtFJ+3a7D2mFyZ7++=D-RyPsL76gw@mail.gmail.com
author: Tom Lane <tgl@sss.pgh.pa.us> 2017-04-07 22:20:03 -0400
committer: Tom Lane <tgl@sss.pgh.pa.us> 2017-04-07 22:20:13 -0400
commit: 9c7f5229ad68d7e0e4dd149e3f80257893e404d4 (patch)
tree: 0a167d403952550f43941b01b24ed5e7526c5351 /src/backend/executor/nodeMergejoin.c
parent: f13a9121f9822eafe05cc3178bf046155a248173 (diff)
download: postgresql-9c7f5229ad68d7e0e4dd149e3f80257893e404d4.tar.gz
postgresql-9c7f5229ad68d7e0e4dd149e3f80257893e404d4.zip
1 files changed, 40 insertions, 16 deletions
diff --git a/src/backend/executor/nodeMergejoin.c b/src/backend/executor/nodeMergejoin.c
index 62784af304b..8c483bf4747 100644
--- a/src/backend/executor/nodeMergejoin.c
+++ b/src/backend/executor/nodeMergejoin.c
@@ -802,10 +802,11 @@ ExecMergeJoin(MergeJoinState *node)
 					}
 
 					/*
-					 * In a semijoin, we'll consider returning the first
-					 * match, but after that we're done with this outer tuple.
+					 * If we only need to join to the first matching inner
+					 * tuple, then consider returning this one, but after that
+					 * continue with next outer tuple.
 					 */
-					if (node->js.jointype == JOIN_SEMI)
+					if (node->js.single_match)
 						node->mj_JoinState = EXEC_MJ_NEXTOUTER;
 
 					qualResult = (otherqual == NULL ||
@@ -1050,6 +1051,10 @@ ExecMergeJoin(MergeJoinState *node)
 					 * scan position to the first mark, and go join that tuple
 					 * (and any following ones) to the new outer.
 					 *
+					 * If we were able to determine mark and restore are not
+					 * needed, then we don't have to back up; the current
+					 * inner is already the first possible match.
+					 *
 					 * NOTE: we do not need to worry about the MatchedInner
 					 * state for the rescanned inner tuples.  We know all of
 					 * them will match this new outer tuple and therefore
@@ -1062,16 +1067,19 @@ ExecMergeJoin(MergeJoinState *node)
 					 * forcing the merge clause to never match, so we never
 					 * get here.
 					 */
-					ExecRestrPos(innerPlan);
+					if (!node->mj_SkipMarkRestore)
+					{
+						ExecRestrPos(innerPlan);
 
-					/*
-					 * ExecRestrPos probably should give us back a new Slot,
-					 * but since it doesn't, use the marked slot.  (The
-					 * previously returned mj_InnerTupleSlot cannot be assumed
-					 * to hold the required tuple.)
-					 */
-					node->mj_InnerTupleSlot = innerTupleSlot;
-					/* we need not do MJEvalInnerValues again */
+						/*
+						 * ExecRestrPos probably should give us back a new
+						 * Slot, but since it doesn't, use the marked slot.
+						 * (The previously returned mj_InnerTupleSlot cannot
+						 * be assumed to hold the required tuple.)
+						 */
+						node->mj_InnerTupleSlot = innerTupleSlot;
+						/* we need not do MJEvalInnerValues again */
+					}
 
 					node->mj_JoinState = EXEC_MJ_JOINTUPLES;
 				}
@@ -1172,7 +1180,8 @@ ExecMergeJoin(MergeJoinState *node)
 
 				if (compareResult == 0)
 				{
-					ExecMarkPos(innerPlan);
+					if (!node->mj_SkipMarkRestore)
+						ExecMarkPos(innerPlan);
 
 					MarkInnerTuple(node->mj_InnerTupleSlot, node);
 
@@ -1466,11 +1475,18 @@ ExecInitMergeJoin(MergeJoin *node, EState *estate, int eflags)
 	/*
 	 * initialize child nodes
 	 *
-	 * inner child must support MARK/RESTORE.
+	 * inner child must support MARK/RESTORE, unless we have detected that we
+	 * don't need that.  Note that skip_mark_restore must never be set if
+	 * there are non-mergeclause joinquals, since the logic wouldn't work.
 	 */
+	Assert(node->join.joinqual == NIL || !node->skip_mark_restore);
+	mergestate->mj_SkipMarkRestore = node->skip_mark_restore;
+
 	outerPlanState(mergestate) = ExecInitNode(outerPlan(node), estate, eflags);
 	innerPlanState(mergestate) = ExecInitNode(innerPlan(node), estate,
-											  eflags | EXEC_FLAG_MARK);
+											  mergestate->mj_SkipMarkRestore ?
+											  eflags :
+											  (eflags | EXEC_FLAG_MARK));
 
 	/*
 	 * For certain types of inner child nodes, it is advantageous to issue
@@ -1483,7 +1499,8 @@ ExecInitMergeJoin(MergeJoin *node, EState *estate, int eflags)
 	 * only if eflags doesn't specify REWIND.
 	 */
 	if (IsA(innerPlan(node), Material) &&
-		(eflags & EXEC_FLAG_REWIND) == 0)
+		(eflags & EXEC_FLAG_REWIND) == 0 &&
+		!mergestate->mj_SkipMarkRestore)
 		mergestate->mj_ExtraMarks = true;
 	else
 		mergestate->mj_ExtraMarks = false;
@@ -1497,6 +1514,13 @@ ExecInitMergeJoin(MergeJoin *node, EState *estate, int eflags)
 	ExecSetSlotDescriptor(mergestate->mj_MarkedTupleSlot,
 						  ExecGetResultType(innerPlanState(mergestate)));
 
+	/*
+	 * detect whether we need only consider the first matching inner tuple
+	 */
+	mergestate->js.single_match = (node->join.inner_unique ||
+								   node->join.jointype == JOIN_SEMI);
+
+	/* set up null tuples for outer joins, if needed */
 	switch (node->join.jointype)
 	{
 		case JOIN_INNER:
author	Tom Lane <tgl@sss.pgh.pa.us>	2017-04-07 22:20:03 -0400
committer	Tom Lane <tgl@sss.pgh.pa.us>	2017-04-07 22:20:13 -0400
commit	9c7f5229ad68d7e0e4dd149e3f80257893e404d4 (patch)
tree	0a167d403952550f43941b01b24ed5e7526c5351 /src/backend/executor/nodeMergejoin.c
parent	f13a9121f9822eafe05cc3178bf046155a248173 (diff)
download	postgresql-9c7f5229ad68d7e0e4dd149e3f80257893e404d4.tar.gz postgresql-9c7f5229ad68d7e0e4dd149e3f80257893e404d4.zip