diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/Makefile.global.in | 3 | ||||
-rw-r--r-- | src/include/port/pg_bitutils.h | 176 | ||||
-rw-r--r-- | src/port/Makefile | 16 | ||||
-rw-r--r-- | src/port/pg_bitutils.c | 378 | ||||
-rw-r--r-- | src/port/pg_bitutils_hwpopcnt.c | 36 |
5 files changed, 261 insertions, 348 deletions
diff --git a/src/Makefile.global.in b/src/Makefile.global.in index aa16da3e0f2..0f4dd195845 100644 --- a/src/Makefile.global.in +++ b/src/Makefile.global.in @@ -517,6 +517,9 @@ WIN32_STACK_RLIMIT=4194304 # Set if we have a working win32 crashdump header have_win32_dbghelp = @have_win32_dbghelp@ +# Set if __builtin_popcount() is supported by $(CC) +have__builtin_popcount = @have__builtin_popcount@ + # Pull in platform-specific magic include $(top_builddir)/src/Makefile.port diff --git a/src/include/port/pg_bitutils.h b/src/include/port/pg_bitutils.h index 148c5550573..70aae5128fa 100644 --- a/src/include/port/pg_bitutils.h +++ b/src/include/port/pg_bitutils.h @@ -10,17 +10,177 @@ * *------------------------------------------------------------------------ - */ - #ifndef PG_BITUTILS_H #define PG_BITUTILS_H -extern int (*pg_popcount32) (uint32 word); -extern int (*pg_popcount64) (uint64 word); -extern int (*pg_rightmost_one32) (uint32 word); -extern int (*pg_rightmost_one64) (uint64 word); -extern int (*pg_leftmost_one32) (uint32 word); -extern int (*pg_leftmost_one64) (uint64 word); - +extern int (*pg_popcount32) (uint32 word); +extern int (*pg_popcount64) (uint64 word); extern uint64 pg_popcount(const char *buf, int bytes); +/* in pg_bitutils_hwpopcnt.c */ +extern int pg_popcount32_hw(uint32 word); +extern int pg_popcount64_hw(uint64 word); + + +#ifndef HAVE__BUILTIN_CTZ +/* + * Array marking the position of the right-most set bit for each value of + * 1-255. We count the right-most position as the 0th bit, and the + * left-most the 7th bit. The 0th index of the array must not be used. + */ +static const uint8 rightmost_one_pos[256] = { + 0, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0 +}; +#endif /* !HAVE__BUILTIN_CTZ */ + +/* + * pg_rightmost_one32 + * Returns the number of trailing 0-bits in word, starting at the least + * significant bit position. word must not be 0. + */ +static inline int +pg_rightmost_one32(uint32 word) +{ + int result = 0; + + Assert(word != 0); + +#ifdef HAVE__BUILTIN_CTZ + result = __builtin_ctz(word); +#else + while ((word & 255) == 0) + { + word >>= 8; + result += 8; + } + result += rightmost_one_pos[word & 255]; +#endif /* HAVE__BUILTIN_CTZ */ + + return result; +} + +/* + * pg_rightmost_one64 + * Returns the number of trailing 0-bits in word, starting at the least + * significant bit position. word must not be 0. + */ +static inline int +pg_rightmost_one64(uint64 word) +{ + int result = 0; + + Assert(word != 0); +#ifdef HAVE__BUILTIN_CTZ +#if defined(HAVE_LONG_INT_64) + return __builtin_ctzl(word); +#elif defined(HAVE_LONG_LONG_INT_64) + return __builtin_ctzll(word); +#else +#error must have a working 64-bit integer datatype +#endif +#else /* HAVE__BUILTIN_CTZ */ + while ((word & 255) == 0) + { + word >>= 8; + result += 8; + } + result += rightmost_one_pos[word & 255]; +#endif + + return result; +} + +#ifndef HAVE__BUILTIN_CLZ +/* + * Array marking the position of the left-most set bit for each value of + * 1-255. We count the right-most position as the 0th bit, and the + * left-most the 7th bit. The 0th index of the array must not be used. + */ +static const uint8 leftmost_one_pos[256] = { + 0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7 +}; +#endif /* !HAVE_BUILTIN_CLZ */ + +/* + * pg_leftmost_one32 + * Returns the 0-based position of the most significant set bit in word + * measured from the least significant bit. word must not be 0. + */ +static inline int +pg_leftmost_one32(uint32 word) +{ +#ifdef HAVE__BUILTIN_CLZ + Assert(word != 0); + + return 31 - __builtin_clz(word); +#else + int shift = 32 - 8; + + Assert(word != 0); + + while ((word >> shift) == 0) + shift -= 8; + + return shift + leftmost_one_pos[(word >> shift) & 255]; +#endif /* HAVE__BUILTIN_CLZ */ +} + +/* + * pg_leftmost_one64 + * Returns the 0-based position of the most significant set bit in word + * measured from the least significant bit. word must not be 0. + */ +static inline int +pg_leftmost_one64(uint64 word) +{ +#ifdef HAVE__BUILTIN_CLZ + Assert(word != 0); +#if defined(HAVE_LONG_INT_64) + return 63 - __builtin_clzl(word); +#elif defined(HAVE_LONG_LONG_INT_64) + return 63 - __builtin_clzll(word); +#else +#error must have a working 64-bit integer datatype +#endif +#else /* HAVE__BUILTIN_CLZ */ + int shift = 64 - 8; + + Assert(word != 0); + while ((word >> shift) == 0) + shift -= 8; + + return shift + leftmost_one_pos[(word >> shift) & 255]; +#endif /* !HAVE__BUIILTIN_CLZ */ +} + #endif /* PG_BITUTILS_H */ diff --git a/src/port/Makefile b/src/port/Makefile index 2da73260a13..a7f8fd2e668 100644 --- a/src/port/Makefile +++ b/src/port/Makefile @@ -41,6 +41,14 @@ OBJS = $(LIBOBJS) $(PG_CRC32C_OBJS) chklocale.o erand48.o inet_net_ntop.o \ qsort.o qsort_arg.o quotes.o snprintf.o sprompt.o strerror.o \ tar.o thread.o +# If the compiler supports a special flag for the POPCOUNT instruction and it +# has __builtin_popcount, add pg_bitutils_hwpopcnt.o. +ifneq ($(CFLAGS_POPCNT),) +ifeq ($(have__builtin_popcount),yes) +OBJS += pg_bitutils_hwpopcnt.o +endif +endif + # libpgport.a, libpgport_shlib.a, and libpgport_srv.a contain the same files # foo.o, foo_shlib.o, and foo_srv.o are all built from foo.c OBJS_SHLIB = $(OBJS:%.o=%_shlib.o) @@ -78,10 +86,10 @@ pg_crc32c_armv8.o: CFLAGS+=$(CFLAGS_ARMV8_CRC32C) pg_crc32c_armv8_shlib.o: CFLAGS+=$(CFLAGS_ARMV8_CRC32C) pg_crc32c_armv8_srv.o: CFLAGS+=$(CFLAGS_ARMV8_CRC32C) -# pg_bitutils.c needs CFLAGS_POPCNT -pg_bitutils.o: CFLAGS+=$(CFLAGS_POPCNT) -pg_bitutils_shlib.o: CFLAGS+=$(CFLAGS_POPCNT) -pg_bitutils_srv.o: CFLAGS+=$(CFLAGS_POPCNT) +# all versions of pg_bitutils_hwpopcnt.c need CFLAGS_POPCNT +pg_bitutils_hwpopcnt.o: CFLAGS+=$(CFLAGS_POPCNT) +pg_bitutils_hwpopcnt_shlib.o: CFLAGS+=$(CFLAGS_POPCNT) +pg_bitutils_hwpopcnt_srv.o: CFLAGS+=$(CFLAGS_POPCNT) # # Shared library versions of object files diff --git a/src/port/pg_bitutils.c b/src/port/pg_bitutils.c index aac394fe927..97bfcebe4e1 100644 --- a/src/port/pg_bitutils.c +++ b/src/port/pg_bitutils.c @@ -10,7 +10,6 @@ * *------------------------------------------------------------------------- */ - #include "postgres.h" #ifdef HAVE__GET_CPUID @@ -23,61 +22,21 @@ #include "port/pg_bitutils.h" -#if defined(HAVE__BUILTIN_POPCOUNT) && defined(HAVE__GET_CPUID) +#ifdef HAVE__BUILTIN_POPCOUNT static bool pg_popcount_available(void); -static int pg_popcount32_choose(uint32 word); -static int pg_popcount32_sse42(uint32 word); -static int pg_popcount64_choose(uint64 word); -static int pg_popcount64_sse42(uint64 word); -#endif -static int pg_popcount32_slow(uint32 word); -static int pg_popcount64_slow(uint64 word); - -#if defined(HAVE__GET_CPUID) && (defined(HAVE__BUILTIN_CTZ) || defined(HAVE__BUILTIN_CLZ)) -static bool pg_lzcnt_available(void); -#endif - -#if defined(HAVE__BUILTIN_CTZ) && defined(HAVE__GET_CPUID) -static int pg_rightmost_one32_choose(uint32 word); -static int pg_rightmost_one32_abm(uint32 word); -static int pg_rightmost_one64_choose(uint64 word); -static int pg_rightmost_one64_abm(uint64 word); -#endif -static int pg_rightmost_one32_slow(uint32 word); -static int pg_rightmost_one64_slow(uint64 word); - -#if defined(HAVE__BUILTIN_CLZ) && defined(HAVE__GET_CPUID) -static int pg_leftmost_one32_choose(uint32 word); -static int pg_leftmost_one32_abm(uint32 word); -static int pg_leftmost_one64_choose(uint64 word); -static int pg_leftmost_one64_abm(uint64 word); -#endif -static int pg_leftmost_one32_slow(uint32 word); -static int pg_leftmost_one64_slow(uint64 word); - -#if defined(HAVE__BUILTIN_POPCOUNT) && defined(HAVE__GET_CPUID) -int (*pg_popcount32) (uint32 word) = pg_popcount32_choose; -int (*pg_popcount64) (uint64 word) = pg_popcount64_choose; -#else -int (*pg_popcount32) (uint32 word) = pg_popcount32_slow; -int (*pg_popcount64) (uint64 word) = pg_popcount64_slow; -#endif - -#if defined(HAVE__BUILTIN_CTZ) && defined(HAVE__GET_CPUID) -int (*pg_rightmost_one32) (uint32 word) = pg_rightmost_one32_choose; -int (*pg_rightmost_one64) (uint64 word) = pg_rightmost_one64_choose; +static int pg_popcount32_choose(uint32 word); +static int pg_popcount32_builtin(uint32 word); +static int pg_popcount64_choose(uint64 word); +static int pg_popcount64_builtin(uint64 word); +int (*pg_popcount32) (uint32 word) = pg_popcount32_choose; +int (*pg_popcount64) (uint64 word) = pg_popcount64_choose; #else -int (*pg_rightmost_one32) (uint32 word) = pg_rightmost_one32_slow; -int (*pg_rightmost_one64) (uint64 word) = pg_rightmost_one64_slow; -#endif +static int pg_popcount32_slow(uint32 word); +static int pg_popcount64_slow(uint64 word); +int (*pg_popcount32) (uint32 word) = pg_popcount32_slow; +int (*pg_popcount64) (uint64 word) = pg_popcount64_slow; +#endif /* !HAVE_BUILTIN_POPCOUNT */ -#if defined(HAVE__BUILTIN_CLZ) && defined(HAVE__GET_CPUID) -int (*pg_leftmost_one32) (uint32 word) = pg_leftmost_one32_choose; -int (*pg_leftmost_one64) (uint64 word) = pg_leftmost_one64_choose; -#else -int (*pg_leftmost_one32) (uint32 word) = pg_leftmost_one32_slow; -int (*pg_leftmost_one64) (uint64 word) = pg_leftmost_one64_slow; -#endif /* Array marking the number of 1-bits for each value of 0-255. */ static const uint8 number_of_ones[256] = { @@ -100,96 +59,51 @@ static const uint8 number_of_ones[256] = { }; /* - * Array marking the position of the right-most set bit for each value of - * 1-255. We count the right-most position as the 0th bit, and the - * left-most the 7th bit. The 0th index of the array must not be used. + * Return true iff we have CPUID support and it indicates that the POPCNT + * instruction is available. */ -static const uint8 rightmost_one_pos[256] = { - 0, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, - 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, - 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, - 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, - 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, - 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, - 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, - 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, - 7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, - 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, - 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, - 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, - 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, - 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, - 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, - 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0 -}; - -/* - * Array marking the position of the left-most set bit for each value of - * 1-255. We count the right-most position as the 0th bit, and the - * left-most the 7th bit. The 0th index of the array must not be used. - */ -static const uint8 leftmost_one_pos[256] = { - 0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7 -}; - -#if defined(HAVE__GET_CPUID) && defined(HAVE__BUILTIN_POPCOUNT) - static bool pg_popcount_available(void) { - unsigned int exx[4] = { 0, 0, 0, 0 }; +#if defined(HAVE__GET_CPUID) || defined(HAVE__CPUID) + unsigned int exx[4] = {0, 0, 0, 0}; #if defined(HAVE__GET_CPUID) __get_cpuid(1, &exx[0], &exx[1], &exx[2], &exx[3]); #elif defined(HAVE__CPUID) __cpuid(exx, 1); -#else -#error cpuid instruction not available #endif return (exx[2] & (1 << 23)) != 0; /* POPCNT */ -} -#endif +#else /* HAVE__GET_CPUID || HAVE__CPUID */ -#if defined(HAVE__GET_CPUID) && defined(HAVE__BUILTIN_POPCOUNT) + return false; +#endif +} +#ifdef HAVE__BUILTIN_POPCOUNT /* - * This gets called on the first call. It replaces the function pointer - * so that subsequent calls are routed directly to the chosen implementation. + * This gets called on the first call to pg_popcount32. It replaces the + * function pointer so that subsequent calls are routed directly to the chosen + * implementation. */ static int pg_popcount32_choose(uint32 word) { if (pg_popcount_available()) - pg_popcount32 = pg_popcount32_sse42; + pg_popcount32 = pg_popcount32_hw; else - pg_popcount32 = pg_popcount32_slow; + pg_popcount32 = pg_popcount32_builtin; return pg_popcount32(word); } static int -pg_popcount32_sse42(uint32 word) +pg_popcount32_builtin(uint32 word) { return __builtin_popcount(word); } -#endif - +#else /* HAVE__BUILTIN_POPCOUNT */ /* * pg_popcount32_slow * Return the number of 1 bits set in word @@ -197,7 +111,7 @@ pg_popcount32_sse42(uint32 word) static int pg_popcount32_slow(uint32 word) { - int result = 0; + int result = 0; while (word != 0) { @@ -207,6 +121,7 @@ pg_popcount32_slow(uint32 word) return result; } +#endif /* * pg_popcount @@ -215,13 +130,13 @@ pg_popcount32_slow(uint32 word) uint64 pg_popcount(const char *buf, int bytes) { - uint64 popcnt = 0; + uint64 popcnt = 0; #if SIZEOF_VOID_P >= 8 /* Process in 64-bit chunks if the buffer is aligned. */ if (buf == (char *) TYPEALIGN(8, buf)) { - uint64 *words = (uint64 *) buf; + uint64 *words = (uint64 *) buf; while (bytes >= 8) { @@ -235,7 +150,7 @@ pg_popcount(const char *buf, int bytes) /* Process in 32-bit chunks if the buffer is aligned. */ if (buf == (char *) TYPEALIGN(4, buf)) { - uint32 *words = (uint32 *) buf; + uint32 *words = (uint32 *) buf; while (bytes >= 4) { @@ -254,38 +169,36 @@ pg_popcount(const char *buf, int bytes) return popcnt; } -#if defined(HAVE__GET_CPUID) && defined(HAVE__BUILTIN_POPCOUNT) - +#ifdef HAVE__BUILTIN_POPCOUNT /* - * This gets called on the first call. It replaces the function pointer - * so that subsequent calls are routed directly to the chosen implementation. + * This gets called on the first call to pg_popcount64. It replaces the + * function pointer so that subsequent calls are routed directly to the chosen + * implementation. */ static int pg_popcount64_choose(uint64 word) { if (pg_popcount_available()) - pg_popcount64 = pg_popcount64_sse42; + pg_popcount64 = pg_popcount64_hw; else - pg_popcount64 = pg_popcount64_slow; + pg_popcount64 = pg_popcount64_builtin; return pg_popcount64(word); } static int -pg_popcount64_sse42(uint64 word) +pg_popcount64_builtin(uint64 word) { #if defined(HAVE_LONG_INT_64) return __builtin_popcountl(word); #elif defined(HAVE_LONG_LONG_INT_64) return __builtin_popcountll(word); #else - /* shouldn't happen */ #error must have a working 64-bit integer datatype #endif } -#endif - +#else /* HAVE__BUILTIN_POPCOUNT */ /* * pg_popcount64_slow * Return the number of 1 bits set in word @@ -293,7 +206,7 @@ pg_popcount64_sse42(uint64 word) static int pg_popcount64_slow(uint64 word) { - int result = 0; + int result = 0; while (word != 0) { @@ -303,211 +216,4 @@ pg_popcount64_slow(uint64 word) return result; } - -#if defined(HAVE__GET_CPUID) && (defined(HAVE__BUILTIN_CTZ) || defined(HAVE__BUILTIN_CLZ)) - -static bool -pg_lzcnt_available(void) -{ - - unsigned int exx[4] = { 0, 0, 0, 0 }; - -#if defined(HAVE__GET_CPUID) - __get_cpuid(0x80000001, &exx[0], &exx[1], &exx[2], &exx[3]); -#elif defined(HAVE__CPUID) - __cpuid(exx, 0x80000001); -#else -#error cpuid instruction not available -#endif - - return (exx[2] & (1 << 5)) != 0; /* LZCNT */ -} -#endif - -#if defined(HAVE__GET_CPUID) && defined(HAVE__BUILTIN_CTZ) -/* - * This gets called on the first call. It replaces the function pointer - * so that subsequent calls are routed directly to the chosen implementation. - */ -static int -pg_rightmost_one32_choose(uint32 word) -{ - if (pg_lzcnt_available()) - pg_rightmost_one32 = pg_rightmost_one32_abm; - else - pg_rightmost_one32 = pg_rightmost_one32_slow; - - return pg_rightmost_one32(word); -} - -static int -pg_rightmost_one32_abm(uint32 word) -{ - return __builtin_ctz(word); -} - #endif - -/* - * pg_rightmost_one32_slow - * Returns the number of trailing 0-bits in word, starting at the least - * significant bit position. word must not be 0. - */ -static int -pg_rightmost_one32_slow(uint32 word) -{ - int result = 0; - - Assert(word != 0); - - while ((word & 255) == 0) - { - word >>= 8; - result += 8; - } - result += rightmost_one_pos[word & 255]; - - return result; -} - -#if defined(HAVE__GET_CPUID) && defined(HAVE__BUILTIN_CTZ) -/* - * This gets called on the first call. It replaces the function pointer - * so that subsequent calls are routed directly to the chosen implementation. - */ -static int -pg_rightmost_one64_choose(uint64 word) -{ - if (pg_lzcnt_available()) - pg_rightmost_one64 = pg_rightmost_one64_abm; - else - pg_rightmost_one64 = pg_rightmost_one64_slow; - - return pg_rightmost_one64(word); -} - -static int -pg_rightmost_one64_abm(uint64 word) -{ -#if defined(HAVE_LONG_INT_64) - return __builtin_ctzl(word); -#elif defined(HAVE_LONG_LONG_INT_64) - return __builtin_ctzll(word); -#else - /* shouldn't happen */ -#error must have a working 64-bit integer datatype -#endif -} -#endif - -/* - * pg_rightmost_one64_slow - * Returns the number of trailing 0-bits in word, starting at the least - * significant bit position. word must not be 0. - */ -static int -pg_rightmost_one64_slow(uint64 word) -{ - int result = 0; - - Assert(word != 0); - - while ((word & 255) == 0) - { - word >>= 8; - result += 8; - } - result += rightmost_one_pos[word & 255]; - - return result; -} - -#if defined(HAVE__GET_CPUID) && defined(HAVE__BUILTIN_CLZ) -/* - * This gets called on the first call. It replaces the function pointer - * so that subsequent calls are routed directly to the chosen implementation. - */ -static int -pg_leftmost_one32_choose(uint32 word) -{ - if (pg_lzcnt_available()) - pg_leftmost_one32 = pg_leftmost_one32_abm; - else - pg_leftmost_one32 = pg_leftmost_one32_slow; - - return pg_leftmost_one32(word); -} - -static int -pg_leftmost_one32_abm(uint32 word) -{ - return 31 - __builtin_clz(word); -} -#endif - -/* - * pg_leftmost_one32_slow - * Returns the 0-based position of the most significant set bit in word - * measured from the least significant bit. word must not be 0. - */ -static int -pg_leftmost_one32_slow(uint32 word) -{ - int shift = 32 - 8; - - Assert(word != 0); - - while ((word >> shift) == 0) - shift -= 8; - - return shift + leftmost_one_pos[(word >> shift) & 255]; -} - -#if defined(HAVE__GET_CPUID) && defined(HAVE__BUILTIN_CLZ) -/* - * This gets called on the first call. It replaces the function pointer - * so that subsequent calls are routed directly to the chosen implementation. - */ -static int -pg_leftmost_one64_choose(uint64 word) -{ - if (pg_lzcnt_available()) - pg_leftmost_one64 = pg_leftmost_one64_abm; - else - pg_leftmost_one64 = pg_leftmost_one64_slow; - - return pg_leftmost_one64(word); -} - -static int -pg_leftmost_one64_abm(uint64 word) -{ -#if defined(HAVE_LONG_INT_64) - return 63 - __builtin_clzl(word); -#elif defined(HAVE_LONG_LONG_INT_64) - return 63 - __builtin_clzll(word); -#else - /* shouldn't happen */ -#error must have a working 64-bit integer datatype -#endif - -} -#endif - -/* - * pg_leftmost_one64_slow - * Returns the 0-based position of the most significant set bit in word - * measured from the least significant bit. word must not be 0. - */ -static int -pg_leftmost_one64_slow(uint64 word) -{ - int shift = 64 - 8; - - Assert(word != 0); - - while ((word >> shift) == 0) - shift -= 8; - - return shift + leftmost_one_pos[(word >> shift) & 255]; -} diff --git a/src/port/pg_bitutils_hwpopcnt.c b/src/port/pg_bitutils_hwpopcnt.c new file mode 100644 index 00000000000..516efd586dd --- /dev/null +++ b/src/port/pg_bitutils_hwpopcnt.c @@ -0,0 +1,36 @@ +/*------------------------------------------------------------------------- + * + * pg_bitutils_hwpopcnt.c + * CPU-optimized implementation of pg_popcount variants + * + * This file must be compiled with a compiler-specific flag to enable the + * POPCNT instruction. + * + * Portions Copyright (c) 2019, PostgreSQL Global Development Group + * + * IDENTIFICATION + * src/port/pg_bitutils_hwpopcnt.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "port/pg_bitutils.h" + +int +pg_popcount32_hw(uint32 word) +{ + return __builtin_popcount(word); +} + +int +pg_popcount64_hw(uint64 word) +{ +#if defined(HAVE_LONG_INT_64) + return __builtin_popcountl(word); +#elif defined(HAVE_LONG_LONG_INT_64) + return __builtin_popcountll(word); +#else +#error must have a working 64-bit integer datatype +#endif +} |