diff options
author | Tom Lane <tgl@sss.pgh.pa.us> | 2025-03-20 16:23:09 -0400 |
---|---|---|
committer | Tom Lane <tgl@sss.pgh.pa.us> | 2025-03-20 16:23:09 -0400 |
commit | fdb5dd6331e305f797bb589747f056062c305f0b (patch) | |
tree | 508fae147562ddb2531a651dfd59d35e537939c8 /config | |
parent | 50ba65e73325cf55fedb3e1f14673d816726923b (diff) | |
download | postgresql-fdb5dd6331e305f797bb589747f056062c305f0b.tar.gz postgresql-fdb5dd6331e305f797bb589747f056062c305f0b.zip |
Be more paranoid in configure's checks for CRC and POPCNT intrinsics.
In these tests, we need to verify not only that the compiler has heard
of these intrinsics, but that lower-level tools cope with them too.
(For example, the assembler must also know the instructions, and on
some platforms there might be library support involved.) The hazard
is that the compiler might optimize away the calls altogether,
allowing the configure check to succeed only to have the build fail
later if lower-level support is missing. The existing code tried to
prevent that by ensuring that the result of the intrinsic is used
for something, but that's really insufficient because we were feeding
constant input to it. So the compiler would be perfectly entitled to
optimize away the calls anyway. Fix by making the inputs into global
variables. (Hypothetically, LTO optimization could still remove the
code --- but that's well past where we'd be likely to hit trouble.)
It is not known that any current compiler would actually optimize
away these calls, and even if that happened it would be unlikely
that any problem would manifest. Our concern for this stems from
largely-bygone days when it was common to install gcc on platforms
with some other native compiler, so that a compiler-vs-library
support discrepancy was more probable. Still, there's little
point in defending against such cases in a way that is visibly
incomplete.
I'm content to fix this in master for now; we can back-patch if
any indication appears that it's a live problem for someone.
Discussion: https://postgr.es/m/3368102.1741993462@sss.pgh.pa.us
Diffstat (limited to 'config')
-rw-r--r-- | config/c-compiler.m4 | 28 |
1 files changed, 16 insertions, 12 deletions
diff --git a/config/c-compiler.m4 b/config/c-compiler.m4 index 8534cc54c13..3712e81e38c 100644 --- a/config/c-compiler.m4 +++ b/config/c-compiler.m4 @@ -553,16 +553,20 @@ fi])# PGAC_HAVE_GCC__ATOMIC_INT64_CAS # the other ones are, on x86-64 platforms) # # If the intrinsics are supported, sets pgac_sse42_crc32_intrinsics. +# +# To detect the case where the compiler knows the function but library support +# is missing, we must link not just compile, and store the results in global +# variables so the compiler doesn't optimize away the call. AC_DEFUN([PGAC_SSE42_CRC32_INTRINSICS], [define([Ac_cachevar], [AS_TR_SH([pgac_cv_sse42_crc32_intrinsics])])dnl AC_CACHE_CHECK([for _mm_crc32_u8 and _mm_crc32_u32], [Ac_cachevar], [AC_LINK_IFELSE([AC_LANG_PROGRAM([#include <nmmintrin.h> + unsigned int crc; #if defined(__has_attribute) && __has_attribute (target) __attribute__((target("sse4.2"))) #endif static int crc32_sse42_test(void) { - unsigned int crc = 0; crc = _mm_crc32_u8(crc, 0); crc = _mm_crc32_u32(crc, 0); /* return computed value, to prevent the above being optimized away */ @@ -593,9 +597,9 @@ AC_DEFUN([PGAC_ARMV8_CRC32C_INTRINSICS], AC_CACHE_CHECK([for __crc32cb, __crc32ch, __crc32cw, and __crc32cd with CFLAGS=$1], [Ac_cachevar], [pgac_save_CFLAGS=$CFLAGS CFLAGS="$pgac_save_CFLAGS $1" -AC_LINK_IFELSE([AC_LANG_PROGRAM([#include <arm_acle.h>], - [unsigned int crc = 0; - crc = __crc32cb(crc, 0); +AC_LINK_IFELSE([AC_LANG_PROGRAM([#include <arm_acle.h> +unsigned int crc;], + [crc = __crc32cb(crc, 0); crc = __crc32ch(crc, 0); crc = __crc32cw(crc, 0); crc = __crc32cd(crc, 0); @@ -628,9 +632,8 @@ AC_DEFUN([PGAC_LOONGARCH_CRC32C_INTRINSICS], AC_CACHE_CHECK( [for __builtin_loongarch_crcc_w_b_w, __builtin_loongarch_crcc_w_h_w, __builtin_loongarch_crcc_w_w_w and __builtin_loongarch_crcc_w_d_w], [Ac_cachevar], -[AC_LINK_IFELSE([AC_LANG_PROGRAM([], - [unsigned int crc = 0; - crc = __builtin_loongarch_crcc_w_b_w(0, crc); +[AC_LINK_IFELSE([AC_LANG_PROGRAM([unsigned int crc;], + [crc = __builtin_loongarch_crcc_w_b_w(0, crc); crc = __builtin_loongarch_crcc_w_h_w(0, crc); crc = __builtin_loongarch_crcc_w_w_w(0, crc); crc = __builtin_loongarch_crcc_w_d_w(0, crc); @@ -680,22 +683,23 @@ undefine([Ac_cachevar])dnl AC_DEFUN([PGAC_AVX512_POPCNT_INTRINSICS], [define([Ac_cachevar], [AS_TR_SH([pgac_cv_avx512_popcnt_intrinsics])])dnl AC_CACHE_CHECK([for _mm512_popcnt_epi64], [Ac_cachevar], -[AC_LINK_IFELSE([AC_LANG_PROGRAM([#include <immintrin.h> +[AC_LINK_IFELSE([AC_LANG_PROGRAM([[#include <immintrin.h> #include <stdint.h> + char buf[sizeof(__m512i)]; + #if defined(__has_attribute) && __has_attribute (target) __attribute__((target("avx512vpopcntdq,avx512bw"))) #endif static int popcount_test(void) { - const char buf@<:@sizeof(__m512i)@:>@; int64_t popcnt = 0; __m512i accum = _mm512_setzero_si512(); - const __m512i val = _mm512_maskz_loadu_epi8((__mmask64) 0xf0f0f0f0f0f0f0f0, (const __m512i *) buf); - const __m512i cnt = _mm512_popcnt_epi64(val); + __m512i val = _mm512_maskz_loadu_epi8((__mmask64) 0xf0f0f0f0f0f0f0f0, (const __m512i *) buf); + __m512i cnt = _mm512_popcnt_epi64(val); accum = _mm512_add_epi64(accum, cnt); popcnt = _mm512_reduce_add_epi64(accum); return (int) popcnt; - }], + }]], [return popcount_test();])], [Ac_cachevar=yes], [Ac_cachevar=no])]) |