Align benchmark::State to a cacheline. (#1230)

* Align benchmark::State to a cacheline. This can avoid interference with neighboring objects and stabilize benchmark results. * separate cachline definition from alignment attribute macro Co-authored-by: Roman Lebedev <lebedev.ri@gmail.com> --------- Co-authored-by: dominic <510002+dmah42@users.noreply.github.com> Co-authored-by: Roman Lebedev <lebedev.ri@gmail.com>
author: Chris Kennelly <ckennelly@google.com> 2024-08-16 11:10:18 -0400
committer: GitHub <noreply@github.com> 2024-08-16 16:10:18 +0100
commit: 6126d2a2052bb48d3472ac0468ade50397d393c5 (patch)
tree: 8a6195c703f146b23b87f3ef4ff68caa7824c12f
parent: 437fea4b549a449ac319618552981cb328f1aaf4 (diff)
download: google-benchmark-6126d2a2052bb48d3472ac0468ade50397d393c5.tar.gz
google-benchmark-6126d2a2052bb48d3472ac0468ade50397d393c5.zip
1 files changed, 41 insertions, 2 deletions
diff --git a/include/benchmark/benchmark.h b/include/benchmark/benchmark.h
index 4cdb451..66f3486 100644
--- a/include/benchmark/benchmark.h
+++ b/include/benchmark/benchmark.h
@@ -290,11 +290,50 @@ BENCHMARK(BM_test)->Unit(benchmark::kMillisecond);
 #define BENCHMARK_OVERRIDE
 #endif
 
+#if defined(__GNUC__)
+// Determine the cacheline size based on architecture
+#if defined(__i386__) || defined(__x86_64__)
+#define BENCHMARK_INTERNAL_CACHELINE_SIZE 64
+#elif defined(__powerpc64__)
+#define BENCHMARK_INTERNAL_CACHELINE_SIZE 128
+#elif defined(__aarch64__)
+#define BENCHMARK_INTERNAL_CACHELINE_SIZE 64
+#elif defined(__arm__)
+// Cache line sizes for ARM: These values are not strictly correct since
+// cache line sizes depend on implementations, not architectures.  There
+// are even implementations with cache line sizes configurable at boot
+// time.
+#if defined(__ARM_ARCH_5T__)
+#define BENCHMARK_INTERNAL_CACHELINE_SIZE 32
+#elif defined(__ARM_ARCH_7A__)
+#define BENCHMARK_INTERNAL_CACHELINE_SIZE 64
+#endif  // ARM_ARCH
+#endif  // arches
+#endif  // __GNUC__
+
+#ifndef BENCHMARK_INTERNAL_CACHELINE_SIZE
+// A reasonable default guess.  Note that overestimates tend to waste more
+// space, while underestimates tend to waste more time.
+#define BENCHMARK_INTERNAL_CACHELINE_SIZE 64
+#endif
+
+#if defined(__GNUC__)
+// Indicates that the declared object be cache aligned using
+// `BENCHMARK_INTERNAL_CACHELINE_SIZE` (see above).
+#define BENCHMARK_INTERNAL_CACHELINE_ALIGNED \
+  __attribute__((aligned(BENCHMARK_INTERNAL_CACHELINE_SIZE)))
+#elif defined(_MSC_VER)
+#define BENCHMARK_INTERNAL_CACHELINE_ALIGNED \
+  __declspec(align(BENCHMARK_INTERNAL_CACHELINE_SIZE))
+#else
+#define BENCHMARK_INTERNAL_CACHELINE_ALIGNED
+#endif
+
 #if defined(_MSC_VER)
 #pragma warning(push)
 // C4251: <symbol> needs to have dll-interface to be used by clients of class
 #pragma warning(disable : 4251)
-#endif
+#endif  // _MSC_VER_
 
 namespace benchmark {
 class BenchmarkReporter;
@@ -759,7 +798,7 @@ enum Skipped
 
 // State is passed to a running Benchmark and contains state for the
 // benchmark to use.
-class BENCHMARK_EXPORT State {
+class BENCHMARK_EXPORT BENCHMARK_INTERNAL_CACHELINE_ALIGNED State {
  public:
   struct StateIterator;
   friend struct StateIterator;
author	Chris Kennelly <ckennelly@google.com>	2024-08-16 11:10:18 -0400
committer	GitHub <noreply@github.com>	2024-08-16 16:10:18 +0100
commit	6126d2a2052bb48d3472ac0468ade50397d393c5 (patch)
tree	8a6195c703f146b23b87f3ef4ff68caa7824c12f
parent	437fea4b549a449ac319618552981cb328f1aaf4 (diff)
download	google-benchmark-6126d2a2052bb48d3472ac0468ade50397d393c5.tar.gz google-benchmark-6126d2a2052bb48d3472ac0468ade50397d393c5.zip