diff options
author | Tom Lane <tgl@sss.pgh.pa.us> | 2019-02-15 23:22:27 -0500 |
---|---|---|
committer | Tom Lane <tgl@sss.pgh.pa.us> | 2019-02-15 23:22:33 -0500 |
commit | 02a6a54ecd6632f974b1b4eebfb2373363431084 (patch) | |
tree | 1e746052ab43122831974a8e38f3458cdf651505 /src/backend/nodes/bitmapset.c | |
parent | 72880ac182c8f3769f0be868f77166994282cb2a (diff) | |
download | postgresql-02a6a54ecd6632f974b1b4eebfb2373363431084.tar.gz postgresql-02a6a54ecd6632f974b1b4eebfb2373363431084.zip |
Make use of compiler builtins and/or assembly for CLZ, CTZ, POPCNT.
Test for the compiler builtins __builtin_clz, __builtin_ctz, and
__builtin_popcount, and make use of these in preference to
handwritten C code if they're available. Create src/port
infrastructure for "leftmost one", "rightmost one", and "popcount"
so as to centralize these decisions.
On x86_64, __builtin_popcount generally won't make use of the POPCNT
opcode because that's not universally supported yet. Provide code
that checks CPUID and then calls POPCNT via asm() if available.
This requires indirecting through a function pointer, which is
an annoying amount of overhead for a one-instruction operation,
but it's probably not worth working harder than this for our
current use-cases.
I'm not sure we've found all the existing places that could profit
from this new infrastructure; but we at least touched all the
ones that used copied-and-pasted versions of the bitmapset.c code,
and got rid of multiple copies of the associated constant arrays.
While at it, replace c-compiler.m4's one-per-builtin-function
macros with a single one that can handle all the cases we need
to worry about so far. Also, because I'm paranoid, make those
checks into AC_LINK checks rather than just AC_COMPILE; the
former coding failed to verify that libgcc has support for the
builtin, in cases where it's not inline code.
David Rowley, Thomas Munro, Alvaro Herrera, Tom Lane
Discussion: https://postgr.es/m/CAKJS1f9WTAGG1tPeJnD18hiQW5gAk59fQ6WK-vfdAKEHyRg2RA@mail.gmail.com
Diffstat (limited to 'src/backend/nodes/bitmapset.c')
-rw-r--r-- | src/backend/nodes/bitmapset.c | 130 |
1 files changed, 21 insertions, 109 deletions
diff --git a/src/backend/nodes/bitmapset.c b/src/backend/nodes/bitmapset.c index 62cd00903c0..54f8567c01c 100644 --- a/src/backend/nodes/bitmapset.c +++ b/src/backend/nodes/bitmapset.c @@ -22,6 +22,7 @@ #include "access/hash.h" #include "nodes/pg_list.h" +#include "port/pg_bitutils.h" #define WORDNUM(x) ((x) / BITS_PER_BITMAPWORD) @@ -51,79 +52,18 @@ #define HAS_MULTIPLE_ONES(x) ((bitmapword) RIGHTMOST_ONE(x) != (x)) - -/* - * Lookup tables to avoid need for bit-by-bit groveling - * - * rightmost_one_pos[x] gives the bit number (0-7) of the rightmost one bit - * in a nonzero byte value x. The entry for x=0 is never used. - * - * leftmost_one_pos[x] gives the bit number (0-7) of the leftmost one bit in a - * nonzero byte value x. The entry for x=0 is never used. - * - * number_of_ones[x] gives the number of one-bits (0-8) in a byte value x. - * - * We could make these tables larger and reduce the number of iterations - * in the functions that use them, but bytewise shifts and masks are - * especially fast on many machines, so working a byte at a time seems best. - */ - -static const uint8 rightmost_one_pos[256] = { - 0, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, - 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, - 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, - 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, - 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, - 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, - 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, - 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, - 7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, - 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, - 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, - 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, - 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, - 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, - 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, - 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0 -}; - -static const uint8 leftmost_one_pos[256] = { - 0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7 -}; - -static const uint8 number_of_ones[256] = { - 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, - 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, - 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, - 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, - 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, - 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, - 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, - 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, - 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, - 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8 -}; +/* Select appropriate bit-twiddling functions for bitmap word size */ +#if BITS_PER_BITMAPWORD == 32 +#define bmw_leftmost_one_pos(w) pg_leftmost_one_pos32(w) +#define bmw_rightmost_one_pos(w) pg_rightmost_one_pos32(w) +#define bmw_popcount(w) pg_popcount32(w) +#elif BITS_PER_BITMAPWORD == 64 +#define bmw_leftmost_one_pos(w) pg_leftmost_one_pos64(w) +#define bmw_rightmost_one_pos(w) pg_rightmost_one_pos64(w) +#define bmw_popcount(w) pg_popcount64(w) +#else +#error "invalid BITS_PER_BITMAPWORD" +#endif /* @@ -607,12 +547,7 @@ bms_singleton_member(const Bitmapset *a) if (result >= 0 || HAS_MULTIPLE_ONES(w)) elog(ERROR, "bitmapset has multiple members"); result = wordnum * BITS_PER_BITMAPWORD; - while ((w & 255) == 0) - { - w >>= 8; - result += 8; - } - result += rightmost_one_pos[w & 255]; + result += bmw_rightmost_one_pos(w); } } if (result < 0) @@ -650,12 +585,7 @@ bms_get_singleton_member(const Bitmapset *a, int *member) if (result >= 0 || HAS_MULTIPLE_ONES(w)) return false; result = wordnum * BITS_PER_BITMAPWORD; - while ((w & 255) == 0) - { - w >>= 8; - result += 8; - } - result += rightmost_one_pos[w & 255]; + result += bmw_rightmost_one_pos(w); } } if (result < 0) @@ -681,12 +611,9 @@ bms_num_members(const Bitmapset *a) { bitmapword w = a->words[wordnum]; - /* we assume here that bitmapword is an unsigned type */ - while (w != 0) - { - result += number_of_ones[w & 255]; - w >>= 8; - } + /* No need to count the bits in a zero word */ + if (w != 0) + result += bmw_popcount(w); } return result; } @@ -1041,12 +968,7 @@ bms_first_member(Bitmapset *a) a->words[wordnum] &= ~w; result = wordnum * BITS_PER_BITMAPWORD; - while ((w & 255) == 0) - { - w >>= 8; - result += 8; - } - result += rightmost_one_pos[w & 255]; + result += bmw_rightmost_one_pos(w); return result; } } @@ -1096,12 +1018,7 @@ bms_next_member(const Bitmapset *a, int prevbit) int result; result = wordnum * BITS_PER_BITMAPWORD; - while ((w & 255) == 0) - { - w >>= 8; - result += 8; - } - result += rightmost_one_pos[w & 255]; + result += bmw_rightmost_one_pos(w); return result; } @@ -1168,14 +1085,9 @@ bms_prev_member(const Bitmapset *a, int prevbit) if (w != 0) { int result; - int shift = BITS_PER_BITMAPWORD - 8; result = wordnum * BITS_PER_BITMAPWORD; - - while ((w >> shift) == 0) - shift -= 8; - - result += shift + leftmost_one_pos[(w >> shift) & 255]; + result += bmw_leftmost_one_pos(w); return result; } |