aboutsummaryrefslogtreecommitdiff
path: root/src/include
diff options
context:
space:
mode:
Diffstat (limited to 'src/include')
-rw-r--r--src/include/port/pg_lfind.h2
-rw-r--r--src/include/port/simd.h22
2 files changed, 23 insertions, 1 deletions
diff --git a/src/include/port/pg_lfind.h b/src/include/port/pg_lfind.h
index d575e733d34..0625cac6b59 100644
--- a/src/include/port/pg_lfind.h
+++ b/src/include/port/pg_lfind.h
@@ -151,7 +151,7 @@ pg_lfind32(uint32 key, uint32 *base, uint32 nelem)
result = vector32_or(tmp1, tmp2);
/* see if there was a match */
- if (vector8_is_highbit_set((Vector8) result))
+ if (vector32_is_highbit_set(result))
{
Assert(assert_result == true);
return true;
diff --git a/src/include/port/simd.h b/src/include/port/simd.h
index b538ac070f7..0ff1549083a 100644
--- a/src/include/port/simd.h
+++ b/src/include/port/simd.h
@@ -275,6 +275,28 @@ vector8_is_highbit_set(const Vector8 v)
}
/*
+ * Exactly like vector32_is_highbit_set except for the input type, so it
+ * looks at each byte separately.
+ *
+ * XXX x86 uses the same underlying type for 8-bit, 16-bit, and 32-bit
+ * integer elements, but Arm does not, hence the need for a separate
+ * function. We could instead adopt the behavior of Arm's vmaxvq_u32(), i.e.
+ * check each 32-bit element, but that would require an additional mask
+ * operation on x86.
+ */
+#ifndef USE_NO_SIMD
+static inline bool
+vector32_is_highbit_set(const Vector32 v)
+{
+#if defined(USE_NEON)
+ return vector8_is_highbit_set((Vector8) v);
+#else
+ return vector8_is_highbit_set(v);
+#endif
+}
+#endif /* ! USE_NO_SIMD */
+
+/*
* Return the bitwise OR of the inputs
*/
static inline Vector8