aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/Makefile.global.in5
-rw-r--r--src/include/c.h12
-rw-r--r--src/makefiles/meson.build4
-rw-r--r--src/port/Makefile12
-rw-r--r--src/port/meson.build7
-rw-r--r--src/port/pg_popcount_avx512.c86
-rw-r--r--src/port/pg_popcount_avx512_choose.c102
7 files changed, 101 insertions, 127 deletions
diff --git a/src/Makefile.global.in b/src/Makefile.global.in
index 4859343153b..0f38d712d15 100644
--- a/src/Makefile.global.in
+++ b/src/Makefile.global.in
@@ -262,9 +262,7 @@ CFLAGS_SL_MODULE = @CFLAGS_SL_MODULE@
CXXFLAGS_SL_MODULE = @CXXFLAGS_SL_MODULE@
CFLAGS_UNROLL_LOOPS = @CFLAGS_UNROLL_LOOPS@
CFLAGS_VECTORIZE = @CFLAGS_VECTORIZE@
-CFLAGS_POPCNT = @CFLAGS_POPCNT@
CFLAGS_CRC = @CFLAGS_CRC@
-CFLAGS_XSAVE = @CFLAGS_XSAVE@
PERMIT_DECLARATION_AFTER_STATEMENT = @PERMIT_DECLARATION_AFTER_STATEMENT@
PERMIT_MISSING_VARIABLE_DECLARATIONS = @PERMIT_MISSING_VARIABLE_DECLARATIONS@
CXXFLAGS = @CXXFLAGS@
@@ -772,9 +770,6 @@ LIBOBJS = @LIBOBJS@
# files needed for the chosen CRC-32C implementation
PG_CRC32C_OBJS = @PG_CRC32C_OBJS@
-# files needed for the chosen popcount implementation
-PG_POPCNT_OBJS = @PG_POPCNT_OBJS@
-
LIBS := -lpgcommon -lpgport $(LIBS)
# to make ws2_32.lib the last library
diff --git a/src/include/c.h b/src/include/c.h
index 55dec71a6d7..0a548d69d7f 100644
--- a/src/include/c.h
+++ b/src/include/c.h
@@ -175,6 +175,18 @@
#endif
/*
+ * pg_attribute_target allows specifying different target options that the
+ * function should be compiled with (e.g., for using special CPU instructions).
+ * Note that there still needs to be a configure-time check to verify that a
+ * specific target is understood by the compiler.
+ */
+#if __has_attribute (target)
+#define pg_attribute_target(...) __attribute__((target(__VA_ARGS__)))
+#else
+#define pg_attribute_target(...)
+#endif
+
+/*
* Append PG_USED_FOR_ASSERTS_ONLY to definitions of variables that are only
* used in assert-enabled builds, to avoid compiler warnings about unused
* variables in assert-disabled builds.
diff --git a/src/makefiles/meson.build b/src/makefiles/meson.build
index e13938fe8ad..aba7411a1be 100644
--- a/src/makefiles/meson.build
+++ b/src/makefiles/meson.build
@@ -102,10 +102,8 @@ pgxs_kv = {
' '.join(cflags_no_missing_var_decls),
'CFLAGS_CRC': ' '.join(cflags_crc),
- 'CFLAGS_POPCNT': ' '.join(cflags_popcnt),
'CFLAGS_UNROLL_LOOPS': ' '.join(unroll_loops_cflags),
'CFLAGS_VECTORIZE': ' '.join(vectorize_cflags),
- 'CFLAGS_XSAVE': ' '.join(cflags_xsave),
'LDFLAGS': var_ldflags,
'LDFLAGS_EX': var_ldflags_ex,
@@ -181,7 +179,7 @@ pgxs_empty = [
'WANTED_LANGUAGES',
# Not needed because we don't build the server / PLs with the generated makefile
- 'LIBOBJS', 'PG_CRC32C_OBJS', 'PG_POPCNT_OBJS', 'TAS',
+ 'LIBOBJS', 'PG_CRC32C_OBJS', 'TAS',
'PG_TEST_EXTRA',
'DTRACEFLAGS', # only server has dtrace probes
diff --git a/src/port/Makefile b/src/port/Makefile
index 9324ec2d9fc..366c814bd92 100644
--- a/src/port/Makefile
+++ b/src/port/Makefile
@@ -38,13 +38,13 @@ LIBS += $(PTHREAD_LIBS)
OBJS = \
$(LIBOBJS) \
$(PG_CRC32C_OBJS) \
- $(PG_POPCNT_OBJS) \
bsearch_arg.o \
chklocale.o \
inet_net_ntop.o \
noblock.o \
path.o \
pg_bitutils.o \
+ pg_popcount_avx512.o \
pg_strong_random.o \
pgcheckdir.o \
pgmkdirp.o \
@@ -92,16 +92,6 @@ pg_crc32c_armv8.o: CFLAGS+=$(CFLAGS_CRC)
pg_crc32c_armv8_shlib.o: CFLAGS+=$(CFLAGS_CRC)
pg_crc32c_armv8_srv.o: CFLAGS+=$(CFLAGS_CRC)
-# all versions of pg_popcount_avx512_choose.o need CFLAGS_XSAVE
-pg_popcount_avx512_choose.o: CFLAGS+=$(CFLAGS_XSAVE)
-pg_popcount_avx512_choose_shlib.o: CFLAGS+=$(CFLAGS_XSAVE)
-pg_popcount_avx512_choose_srv.o: CFLAGS+=$(CFLAGS_XSAVE)
-
-# all versions of pg_popcount_avx512.o need CFLAGS_POPCNT
-pg_popcount_avx512.o: CFLAGS+=$(CFLAGS_POPCNT)
-pg_popcount_avx512_shlib.o: CFLAGS+=$(CFLAGS_POPCNT)
-pg_popcount_avx512_srv.o: CFLAGS+=$(CFLAGS_POPCNT)
-
#
# Shared library versions of object files
#
diff --git a/src/port/meson.build b/src/port/meson.build
index 1150966ab71..83a06325209 100644
--- a/src/port/meson.build
+++ b/src/port/meson.build
@@ -7,6 +7,7 @@ pgport_sources = [
'noblock.c',
'path.c',
'pg_bitutils.c',
+ 'pg_popcount_avx512.c',
'pg_strong_random.c',
'pgcheckdir.c',
'pgmkdirp.c',
@@ -84,8 +85,6 @@ replace_funcs_pos = [
['pg_crc32c_sse42', 'USE_SSE42_CRC32C_WITH_RUNTIME_CHECK', 'crc'],
['pg_crc32c_sse42_choose', 'USE_SSE42_CRC32C_WITH_RUNTIME_CHECK'],
['pg_crc32c_sb8', 'USE_SSE42_CRC32C_WITH_RUNTIME_CHECK'],
- ['pg_popcount_avx512', 'USE_AVX512_POPCNT_WITH_RUNTIME_CHECK', 'popcnt'],
- ['pg_popcount_avx512_choose', 'USE_AVX512_POPCNT_WITH_RUNTIME_CHECK', 'xsave'],
# arm / aarch64
['pg_crc32c_armv8', 'USE_ARMV8_CRC32C'],
@@ -100,8 +99,8 @@ replace_funcs_pos = [
['pg_crc32c_sb8', 'USE_SLICING_BY_8_CRC32C'],
]
-pgport_cflags = {'crc': cflags_crc, 'popcnt': cflags_popcnt, 'xsave': cflags_xsave}
-pgport_sources_cflags = {'crc': [], 'popcnt': [], 'xsave': []}
+pgport_cflags = {'crc': cflags_crc}
+pgport_sources_cflags = {'crc': []}
foreach f : replace_funcs_neg
func = f.get(0)
diff --git a/src/port/pg_popcount_avx512.c b/src/port/pg_popcount_avx512.c
index 9d3149e2d00..b598e865549 100644
--- a/src/port/pg_popcount_avx512.c
+++ b/src/port/pg_popcount_avx512.c
@@ -12,7 +12,17 @@
*/
#include "c.h"
+#if defined(HAVE__GET_CPUID) || defined(HAVE__GET_CPUID_COUNT)
+#include <cpuid.h>
+#endif
+
+#ifdef USE_AVX512_POPCNT_WITH_RUNTIME_CHECK
#include <immintrin.h>
+#endif
+
+#if defined(HAVE__CPUID) || defined(HAVE__CPUIDEX)
+#include <intrin.h>
+#endif
#include "port/pg_bitutils.h"
@@ -21,12 +31,82 @@
* use AVX-512 intrinsics, but we check it anyway to be sure. We piggy-back on
* the function pointers that are only used when TRY_POPCNT_FAST is set.
*/
-#ifdef TRY_POPCNT_FAST
+#if defined(TRY_POPCNT_FAST) && defined(USE_AVX512_POPCNT_WITH_RUNTIME_CHECK)
+
+/*
+ * Does CPUID say there's support for XSAVE instructions?
+ */
+static inline bool
+xsave_available(void)
+{
+ unsigned int exx[4] = {0, 0, 0, 0};
+
+#if defined(HAVE__GET_CPUID)
+ __get_cpuid(1, &exx[0], &exx[1], &exx[2], &exx[3]);
+#elif defined(HAVE__CPUID)
+ __cpuid(exx, 1);
+#else
+#error cpuid instruction not available
+#endif
+ return (exx[2] & (1 << 27)) != 0; /* osxsave */
+}
+
+/*
+ * Does XGETBV say the ZMM registers are enabled?
+ *
+ * NB: Caller is responsible for verifying that xsave_available() returns true
+ * before calling this.
+ */
+#ifdef HAVE_XSAVE_INTRINSICS
+pg_attribute_target("xsave")
+#endif
+static inline bool
+zmm_regs_available(void)
+{
+#ifdef HAVE_XSAVE_INTRINSICS
+ return (_xgetbv(0) & 0xe6) == 0xe6;
+#else
+ return false;
+#endif
+}
+
+/*
+ * Does CPUID say there's support for AVX-512 popcount and byte-and-word
+ * instructions?
+ */
+static inline bool
+avx512_popcnt_available(void)
+{
+ unsigned int exx[4] = {0, 0, 0, 0};
+
+#if defined(HAVE__GET_CPUID_COUNT)
+ __get_cpuid_count(7, 0, &exx[0], &exx[1], &exx[2], &exx[3]);
+#elif defined(HAVE__CPUIDEX)
+ __cpuidex(exx, 7, 0);
+#else
+#error cpuid instruction not available
+#endif
+ return (exx[2] & (1 << 14)) != 0 && /* avx512-vpopcntdq */
+ (exx[1] & (1 << 30)) != 0; /* avx512-bw */
+}
+
+/*
+ * Returns true if the CPU supports the instructions required for the AVX-512
+ * pg_popcount() implementation.
+ */
+bool
+pg_popcount_avx512_available(void)
+{
+ return xsave_available() &&
+ zmm_regs_available() &&
+ avx512_popcnt_available();
+}
/*
* pg_popcount_avx512
* Returns the number of 1-bits in buf
*/
+pg_attribute_target("avx512vpopcntdq", "avx512bw")
uint64
pg_popcount_avx512(const char *buf, int bytes)
{
@@ -82,6 +162,7 @@ pg_popcount_avx512(const char *buf, int bytes)
* pg_popcount_masked_avx512
* Returns the number of 1-bits in buf after applying the mask to each byte
*/
+pg_attribute_target("avx512vpopcntdq", "avx512bw")
uint64
pg_popcount_masked_avx512(const char *buf, int bytes, bits8 mask)
{
@@ -138,4 +219,5 @@ pg_popcount_masked_avx512(const char *buf, int bytes, bits8 mask)
return _mm512_reduce_add_epi64(accum);
}
-#endif /* TRY_POPCNT_FAST */
+#endif /* TRY_POPCNT_FAST &&
+ * USE_AVX512_POPCNT_WITH_RUNTIME_CHECK */
diff --git a/src/port/pg_popcount_avx512_choose.c b/src/port/pg_popcount_avx512_choose.c
deleted file mode 100644
index b37107803a8..00000000000
--- a/src/port/pg_popcount_avx512_choose.c
+++ /dev/null
@@ -1,102 +0,0 @@
-/*-------------------------------------------------------------------------
- *
- * pg_popcount_avx512_choose.c
- * Test whether we can use the AVX-512 pg_popcount() implementation.
- *
- * Copyright (c) 2024, PostgreSQL Global Development Group
- *
- * IDENTIFICATION
- * src/port/pg_popcount_avx512_choose.c
- *
- *-------------------------------------------------------------------------
- */
-#include "c.h"
-
-#if defined(HAVE__GET_CPUID) || defined(HAVE__GET_CPUID_COUNT)
-#include <cpuid.h>
-#endif
-
-#ifdef HAVE_XSAVE_INTRINSICS
-#include <immintrin.h>
-#endif
-
-#if defined(HAVE__CPUID) || defined(HAVE__CPUIDEX)
-#include <intrin.h>
-#endif
-
-#include "port/pg_bitutils.h"
-
-/*
- * It's probably unlikely that TRY_POPCNT_FAST won't be set if we are able to
- * use AVX-512 intrinsics, but we check it anyway to be sure. We piggy-back on
- * the function pointers that are only used when TRY_POPCNT_FAST is set.
- */
-#ifdef TRY_POPCNT_FAST
-
-/*
- * Does CPUID say there's support for XSAVE instructions?
- */
-static inline bool
-xsave_available(void)
-{
- unsigned int exx[4] = {0, 0, 0, 0};
-
-#if defined(HAVE__GET_CPUID)
- __get_cpuid(1, &exx[0], &exx[1], &exx[2], &exx[3]);
-#elif defined(HAVE__CPUID)
- __cpuid(exx, 1);
-#else
-#error cpuid instruction not available
-#endif
- return (exx[2] & (1 << 27)) != 0; /* osxsave */
-}
-
-/*
- * Does XGETBV say the ZMM registers are enabled?
- *
- * NB: Caller is responsible for verifying that xsave_available() returns true
- * before calling this.
- */
-static inline bool
-zmm_regs_available(void)
-{
-#ifdef HAVE_XSAVE_INTRINSICS
- return (_xgetbv(0) & 0xe6) == 0xe6;
-#else
- return false;
-#endif
-}
-
-/*
- * Does CPUID say there's support for AVX-512 popcount and byte-and-word
- * instructions?
- */
-static inline bool
-avx512_popcnt_available(void)
-{
- unsigned int exx[4] = {0, 0, 0, 0};
-
-#if defined(HAVE__GET_CPUID_COUNT)
- __get_cpuid_count(7, 0, &exx[0], &exx[1], &exx[2], &exx[3]);
-#elif defined(HAVE__CPUIDEX)
- __cpuidex(exx, 7, 0);
-#else
-#error cpuid instruction not available
-#endif
- return (exx[2] & (1 << 14)) != 0 && /* avx512-vpopcntdq */
- (exx[1] & (1 << 30)) != 0; /* avx512-bw */
-}
-
-/*
- * Returns true if the CPU supports the instructions required for the AVX-512
- * pg_popcount() implementation.
- */
-bool
-pg_popcount_avx512_available(void)
-{
- return xsave_available() &&
- zmm_regs_available() &&
- avx512_popcnt_available();
-}
-
-#endif /* TRY_POPCNT_FAST */