diff options
Diffstat (limited to 'src/include')
-rw-r--r-- | src/include/executor/hashjoin.h | 40 | ||||
-rw-r--r-- | src/include/executor/nodeHash.h | 8 | ||||
-rw-r--r-- | src/include/nodes/execnodes.h | 10 | ||||
-rw-r--r-- | src/include/nodes/plannodes.h | 13 |
4 files changed, 61 insertions, 10 deletions
diff --git a/src/include/executor/hashjoin.h b/src/include/executor/hashjoin.h index 40a5244ad47..5b18282a646 100644 --- a/src/include/executor/hashjoin.h +++ b/src/include/executor/hashjoin.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/executor/hashjoin.h,v 1.49 2009/01/01 17:23:59 momjian Exp $ + * $PostgreSQL: pgsql/src/include/executor/hashjoin.h,v 1.50 2009/03/21 00:04:40 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -72,6 +72,36 @@ typedef struct HashJoinTupleData #define HJTUPLE_MINTUPLE(hjtup) \ ((MinimalTuple) ((char *) (hjtup) + HJTUPLE_OVERHEAD)) +/* + * If the outer relation's distribution is sufficiently nonuniform, we attempt + * to optimize the join by treating the hash values corresponding to the outer + * relation's MCVs specially. Inner relation tuples matching these hash + * values go into the "skew" hashtable instead of the main hashtable, and + * outer relation tuples with these hash values are matched against that + * table instead of the main one. Thus, tuples with these hash values are + * effectively handled as part of the first batch and will never go to disk. + * The skew hashtable is limited to SKEW_WORK_MEM_PERCENT of the total memory + * allowed for the join; while building the hashtables, we decrease the number + * of MCVs being specially treated if needed to stay under this limit. + * + * Note: you might wonder why we look at the outer relation stats for this, + * rather than the inner. One reason is that the outer relation is typically + * bigger, so we get more I/O savings by optimizing for its most common values. + * Also, for similarly-sized relations, the planner prefers to put the more + * uniformly distributed relation on the inside, so we're more likely to find + * interesting skew in the outer relation. + */ +typedef struct HashSkewBucket +{ + uint32 hashvalue; /* common hash value */ + HashJoinTuple tuples; /* linked list of inner-relation tuples */ +} HashSkewBucket; + +#define SKEW_BUCKET_OVERHEAD MAXALIGN(sizeof(HashSkewBucket)) +#define INVALID_SKEW_BUCKET_NO (-1) +#define SKEW_WORK_MEM_PERCENT 2 +#define SKEW_MIN_OUTER_FRACTION 0.01 + typedef struct HashJoinTableData { @@ -82,6 +112,12 @@ typedef struct HashJoinTableData struct HashJoinTupleData **buckets; /* buckets array is per-batch storage, as are all the tuples */ + bool skewEnabled; /* are we using skew optimization? */ + HashSkewBucket **skewBucket; /* hashtable of skew buckets */ + int skewBucketLen; /* size of skewBucket array (a power of 2!) */ + int nSkewBuckets; /* number of active skew buckets */ + int *skewBucketNums; /* array indexes of active skew buckets */ + int nbatch; /* number of batches */ int curbatch; /* current batch #; 0 during 1st pass */ @@ -113,6 +149,8 @@ typedef struct HashJoinTableData Size spaceUsed; /* memory space currently used by tuples */ Size spaceAllowed; /* upper limit for space used */ + Size spaceUsedSkew; /* skew hash table's current space usage */ + Size spaceAllowedSkew; /* upper limit for skew hashtable */ MemoryContext hashCxt; /* context for whole-hash-join storage */ MemoryContext batchCxt; /* context for this-batch-only storage */ diff --git a/src/include/executor/nodeHash.h b/src/include/executor/nodeHash.h index ae08880d6d3..7c8ca568a6d 100644 --- a/src/include/executor/nodeHash.h +++ b/src/include/executor/nodeHash.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/executor/nodeHash.h,v 1.46 2009/01/01 17:23:59 momjian Exp $ + * $PostgreSQL: pgsql/src/include/executor/nodeHash.h,v 1.47 2009/03/21 00:04:40 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -41,8 +41,10 @@ extern void ExecHashGetBucketAndBatch(HashJoinTable hashtable, extern HashJoinTuple ExecScanHashBucket(HashJoinState *hjstate, ExprContext *econtext); extern void ExecHashTableReset(HashJoinTable hashtable); -extern void ExecChooseHashTableSize(double ntuples, int tupwidth, +extern void ExecChooseHashTableSize(double ntuples, int tupwidth, bool useskew, int *numbuckets, - int *numbatches); + int *numbatches, + int *num_skew_mcvs); +extern int ExecHashGetSkewBucket(HashJoinTable hashtable, uint32 hashvalue); #endif /* NODEHASH_H */ diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h index 8d87ec19e1d..996efa8f87d 100644 --- a/src/include/nodes/execnodes.h +++ b/src/include/nodes/execnodes.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/nodes/execnodes.h,v 1.201 2009/01/12 05:10:45 tgl Exp $ + * $PostgreSQL: pgsql/src/include/nodes/execnodes.h,v 1.202 2009/03/21 00:04:40 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -1374,11 +1374,12 @@ typedef struct MergeJoinState * hj_HashTable hash table for the hashjoin * (NULL if table not built yet) * hj_CurHashValue hash value for current outer tuple - * hj_CurBucketNo bucket# for current outer tuple + * hj_CurBucketNo regular bucket# for current outer tuple + * hj_CurSkewBucketNo skew bucket# for current outer tuple * hj_CurTuple last inner tuple matched to current outer * tuple, or NULL if starting search - * (CurHashValue, CurBucketNo and CurTuple are - * undefined if OuterTupleSlot is empty!) + * (hj_CurXXX variables are undefined if + * OuterTupleSlot is empty!) * hj_OuterHashKeys the outer hash keys in the hashjoin condition * hj_InnerHashKeys the inner hash keys in the hashjoin condition * hj_HashOperators the join operators in the hashjoin condition @@ -1403,6 +1404,7 @@ typedef struct HashJoinState HashJoinTable hj_HashTable; uint32 hj_CurHashValue; int hj_CurBucketNo; + int hj_CurSkewBucketNo; HashJoinTuple hj_CurTuple; List *hj_OuterHashKeys; /* list of ExprState nodes */ List *hj_InnerHashKeys; /* list of ExprState nodes */ diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h index 12742b57e55..9caf0895e4e 100644 --- a/src/include/nodes/plannodes.h +++ b/src/include/nodes/plannodes.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/nodes/plannodes.h,v 1.108 2009/01/01 17:24:00 momjian Exp $ + * $PostgreSQL: pgsql/src/include/nodes/plannodes.h,v 1.109 2009/03/21 00:04:40 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -460,7 +460,7 @@ typedef struct MergeJoin } MergeJoin; /* ---------------- - * hash join (probe) node + * hash join node * ---------------- */ typedef struct HashJoin @@ -567,11 +567,20 @@ typedef struct Unique /* ---------------- * hash build node + * + * If the executor is supposed to try to apply skew join optimization, then + * skewTable/skewColumn identify the outer relation's join key column, from + * which the relevant MCV statistics can be fetched. Also, its type + * information is provided to save a lookup. * ---------------- */ typedef struct Hash { Plan plan; + Oid skewTable; /* outer join key's table OID, or InvalidOid */ + AttrNumber skewColumn; /* outer join key's column #, or zero */ + Oid skewColType; /* datatype of the outer key column */ + int32 skewColTypmod; /* typmod of the outer key column */ /* all other info is in the parent HashJoin node */ } Hash; |