aboutsummaryrefslogtreecommitdiff
path: root/src/include
diff options
context:
space:
mode:
Diffstat (limited to 'src/include')
-rw-r--r--src/include/executor/hashjoin.h40
-rw-r--r--src/include/executor/nodeHash.h8
-rw-r--r--src/include/nodes/execnodes.h10
-rw-r--r--src/include/nodes/plannodes.h13
4 files changed, 61 insertions, 10 deletions
diff --git a/src/include/executor/hashjoin.h b/src/include/executor/hashjoin.h
index 40a5244ad47..5b18282a646 100644
--- a/src/include/executor/hashjoin.h
+++ b/src/include/executor/hashjoin.h
@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $PostgreSQL: pgsql/src/include/executor/hashjoin.h,v 1.49 2009/01/01 17:23:59 momjian Exp $
+ * $PostgreSQL: pgsql/src/include/executor/hashjoin.h,v 1.50 2009/03/21 00:04:40 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -72,6 +72,36 @@ typedef struct HashJoinTupleData
#define HJTUPLE_MINTUPLE(hjtup) \
((MinimalTuple) ((char *) (hjtup) + HJTUPLE_OVERHEAD))
+/*
+ * If the outer relation's distribution is sufficiently nonuniform, we attempt
+ * to optimize the join by treating the hash values corresponding to the outer
+ * relation's MCVs specially. Inner relation tuples matching these hash
+ * values go into the "skew" hashtable instead of the main hashtable, and
+ * outer relation tuples with these hash values are matched against that
+ * table instead of the main one. Thus, tuples with these hash values are
+ * effectively handled as part of the first batch and will never go to disk.
+ * The skew hashtable is limited to SKEW_WORK_MEM_PERCENT of the total memory
+ * allowed for the join; while building the hashtables, we decrease the number
+ * of MCVs being specially treated if needed to stay under this limit.
+ *
+ * Note: you might wonder why we look at the outer relation stats for this,
+ * rather than the inner. One reason is that the outer relation is typically
+ * bigger, so we get more I/O savings by optimizing for its most common values.
+ * Also, for similarly-sized relations, the planner prefers to put the more
+ * uniformly distributed relation on the inside, so we're more likely to find
+ * interesting skew in the outer relation.
+ */
+typedef struct HashSkewBucket
+{
+ uint32 hashvalue; /* common hash value */
+ HashJoinTuple tuples; /* linked list of inner-relation tuples */
+} HashSkewBucket;
+
+#define SKEW_BUCKET_OVERHEAD MAXALIGN(sizeof(HashSkewBucket))
+#define INVALID_SKEW_BUCKET_NO (-1)
+#define SKEW_WORK_MEM_PERCENT 2
+#define SKEW_MIN_OUTER_FRACTION 0.01
+
typedef struct HashJoinTableData
{
@@ -82,6 +112,12 @@ typedef struct HashJoinTableData
struct HashJoinTupleData **buckets;
/* buckets array is per-batch storage, as are all the tuples */
+ bool skewEnabled; /* are we using skew optimization? */
+ HashSkewBucket **skewBucket; /* hashtable of skew buckets */
+ int skewBucketLen; /* size of skewBucket array (a power of 2!) */
+ int nSkewBuckets; /* number of active skew buckets */
+ int *skewBucketNums; /* array indexes of active skew buckets */
+
int nbatch; /* number of batches */
int curbatch; /* current batch #; 0 during 1st pass */
@@ -113,6 +149,8 @@ typedef struct HashJoinTableData
Size spaceUsed; /* memory space currently used by tuples */
Size spaceAllowed; /* upper limit for space used */
+ Size spaceUsedSkew; /* skew hash table's current space usage */
+ Size spaceAllowedSkew; /* upper limit for skew hashtable */
MemoryContext hashCxt; /* context for whole-hash-join storage */
MemoryContext batchCxt; /* context for this-batch-only storage */
diff --git a/src/include/executor/nodeHash.h b/src/include/executor/nodeHash.h
index ae08880d6d3..7c8ca568a6d 100644
--- a/src/include/executor/nodeHash.h
+++ b/src/include/executor/nodeHash.h
@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $PostgreSQL: pgsql/src/include/executor/nodeHash.h,v 1.46 2009/01/01 17:23:59 momjian Exp $
+ * $PostgreSQL: pgsql/src/include/executor/nodeHash.h,v 1.47 2009/03/21 00:04:40 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -41,8 +41,10 @@ extern void ExecHashGetBucketAndBatch(HashJoinTable hashtable,
extern HashJoinTuple ExecScanHashBucket(HashJoinState *hjstate,
ExprContext *econtext);
extern void ExecHashTableReset(HashJoinTable hashtable);
-extern void ExecChooseHashTableSize(double ntuples, int tupwidth,
+extern void ExecChooseHashTableSize(double ntuples, int tupwidth, bool useskew,
int *numbuckets,
- int *numbatches);
+ int *numbatches,
+ int *num_skew_mcvs);
+extern int ExecHashGetSkewBucket(HashJoinTable hashtable, uint32 hashvalue);
#endif /* NODEHASH_H */
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h
index 8d87ec19e1d..996efa8f87d 100644
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $PostgreSQL: pgsql/src/include/nodes/execnodes.h,v 1.201 2009/01/12 05:10:45 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/nodes/execnodes.h,v 1.202 2009/03/21 00:04:40 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -1374,11 +1374,12 @@ typedef struct MergeJoinState
* hj_HashTable hash table for the hashjoin
* (NULL if table not built yet)
* hj_CurHashValue hash value for current outer tuple
- * hj_CurBucketNo bucket# for current outer tuple
+ * hj_CurBucketNo regular bucket# for current outer tuple
+ * hj_CurSkewBucketNo skew bucket# for current outer tuple
* hj_CurTuple last inner tuple matched to current outer
* tuple, or NULL if starting search
- * (CurHashValue, CurBucketNo and CurTuple are
- * undefined if OuterTupleSlot is empty!)
+ * (hj_CurXXX variables are undefined if
+ * OuterTupleSlot is empty!)
* hj_OuterHashKeys the outer hash keys in the hashjoin condition
* hj_InnerHashKeys the inner hash keys in the hashjoin condition
* hj_HashOperators the join operators in the hashjoin condition
@@ -1403,6 +1404,7 @@ typedef struct HashJoinState
HashJoinTable hj_HashTable;
uint32 hj_CurHashValue;
int hj_CurBucketNo;
+ int hj_CurSkewBucketNo;
HashJoinTuple hj_CurTuple;
List *hj_OuterHashKeys; /* list of ExprState nodes */
List *hj_InnerHashKeys; /* list of ExprState nodes */
diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h
index 12742b57e55..9caf0895e4e 100644
--- a/src/include/nodes/plannodes.h
+++ b/src/include/nodes/plannodes.h
@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $PostgreSQL: pgsql/src/include/nodes/plannodes.h,v 1.108 2009/01/01 17:24:00 momjian Exp $
+ * $PostgreSQL: pgsql/src/include/nodes/plannodes.h,v 1.109 2009/03/21 00:04:40 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -460,7 +460,7 @@ typedef struct MergeJoin
} MergeJoin;
/* ----------------
- * hash join (probe) node
+ * hash join node
* ----------------
*/
typedef struct HashJoin
@@ -567,11 +567,20 @@ typedef struct Unique
/* ----------------
* hash build node
+ *
+ * If the executor is supposed to try to apply skew join optimization, then
+ * skewTable/skewColumn identify the outer relation's join key column, from
+ * which the relevant MCV statistics can be fetched. Also, its type
+ * information is provided to save a lookup.
* ----------------
*/
typedef struct Hash
{
Plan plan;
+ Oid skewTable; /* outer join key's table OID, or InvalidOid */
+ AttrNumber skewColumn; /* outer join key's column #, or zero */
+ Oid skewColType; /* datatype of the outer key column */
+ int32 skewColTypmod; /* typmod of the outer key column */
/* all other info is in the parent HashJoin node */
} Hash;