llvm · jhuber6 · Aug 16, 2025 · Aug 15, 2025 · Aug 15, 2025 · Aug 15, 2025
diff --git a/libc/benchmarks/gpu/BenchmarkLogger.cpp b/libc/benchmarks/gpu/BenchmarkLogger.cpp
diff --git a/libc/benchmarks/gpu/BenchmarkLogger.h b/libc/benchmarks/gpu/BenchmarkLogger.h
diff --git a/libc/benchmarks/gpu/CMakeLists.txt b/libc/benchmarks/gpu/CMakeLists.txt
@@ -38,31 +38,25 @@ add_unittest_framework_library(
   SRCS
     LibcGpuBenchmark.cpp
     LibcGpuBenchmarkMain.cpp
-    BenchmarkLogger.cpp
   HDRS
     LibcGpuBenchmark.h
-    BenchmarkLogger.h
   DEPENDS
+    libc.benchmarks.gpu.timing.timing
     libc.hdr.stdint_proxy
-    libc.src.__support.big_int
-    libc.src.__support.c_string
     libc.src.__support.CPP.string
     libc.src.__support.CPP.string_view
     libc.src.__support.CPP.type_traits
-    libc.src.__support.CPP.limits
     libc.src.__support.CPP.algorithm
     libc.src.__support.CPP.atomic
     libc.src.__support.CPP.array
-    libc.src.__support.fixed_point.fx_rep
-    libc.src.__support.macros.properties.types
-    libc.src.__support.OSUtil.osutil
-    libc.src.__support.uint128
     libc.src.__support.FPUtil.fp_bits
+    libc.src.__support.FPUtil.nearest_integer_operations
     libc.src.__support.FPUtil.sqrt
     libc.src.__support.fixedvector
-    libc.src.time.clock
-    libc.benchmarks.gpu.timing.timing
+    libc.src.__support.GPU.utils
+    libc.src.__support.time.gpu.time_utils
     libc.src.stdio.printf
+    libc.src.time.clock
 )
 
 add_subdirectory(src)
diff --git a/libc/benchmarks/gpu/LibcGpuBenchmark.cpp b/libc/benchmarks/gpu/LibcGpuBenchmark.cpp
@@ -2,16 +2,17 @@
 
 #include "hdr/stdint_proxy.h"
 #include "src/__support/CPP/algorithm.h"
-#include "src/__support/CPP/array.h"
 #include "src/__support/CPP/atomic.h"
 #include "src/__support/CPP/string.h"
 #include "src/__support/FPUtil/FPBits.h"
+#include "src/__support/FPUtil/NearestIntegerOperations.h"
 #include "src/__support/FPUtil/sqrt.h"
 #include "src/__support/GPU/utils.h"
 #include "src/__support/fixedvector.h"
 #include "src/__support/macros/config.h"
 #include "src/__support/time/gpu/time_utils.h"
 #include "src/stdio/printf.h"
+#include "src/time/clock.h"
 
 namespace LIBC_NAMESPACE_DECL {
 namespace benchmarks {
@@ -134,11 +135,13 @@ void print_results(Benchmark *b) {
   cpp::atomic_thread_fence(cpp::MemoryOrder::RELEASE);
 
   LIBC_NAMESPACE::printf(
-      "%-24s |%15.0f |%9.0f |%8llu |%8llu |%11llu |%9u |\n",
+      "%-24s |%15.0f |%9.0f |%8llu |%8llu |%15llu |%9u |\n",
       b->get_test_name().data(), final_result.cycles,
-      final_result.standard_deviation, (unsigned long long)final_result.min,
-      (unsigned long long)final_result.max,
-      (unsigned long long)final_result.total_iterations, (unsigned)num_threads);
+      final_result.standard_deviation,
+      static_cast<unsigned long long>(final_result.min),
+      static_cast<unsigned long long>(final_result.max),
+      static_cast<unsigned long long>(final_result.total_iterations),
+      static_cast<unsigned>(num_threads));
 }
 
 void print_header() {
@@ -147,7 +150,7 @@ void print_header() {
                          benchmarks[0]->get_suite_name().data());
   LIBC_NAMESPACE::printf("%s", RESET);
   cpp::string titles = "Benchmark                |  Cycles (Mean) |   Stddev | "
-                       "    Min |     Max | Iterations |  Threads |\n";
+                       "    Min |     Max |     Iterations |  Threads |\n";
   LIBC_NAMESPACE::printf(titles.data());
 
   cpp::string separator(titles.size(), '-');
@@ -226,7 +229,8 @@ BenchmarkResult benchmark(const BenchmarkOptions &options,
         change_ratio < options.epsilon)
       break;
 
-    iterations = static_cast<uint32_t>(iterations * options.scaling_factor);
+    iterations = static_cast<uint32_t>(
+        fputil::ceil(iterations * options.scaling_factor));
   }
 
   const auto &estimator = rep.get_estimator();

diff --git a/libc/benchmarks/gpu/LibcGpuBenchmark.h b/libc/benchmarks/gpu/LibcGpuBenchmark.h
@@ -1,18 +1,16 @@
 #ifndef LLVM_LIBC_BENCHMARKS_LIBC_GPU_BENCHMARK_H
 #define LLVM_LIBC_BENCHMARKS_LIBC_GPU_BENCHMARK_H
 
-#include "benchmarks/gpu/BenchmarkLogger.h"
 #include "benchmarks/gpu/timing/timing.h"
+
 #include "hdr/stdint_proxy.h"
 #include "src/__support/CPP/algorithm.h"
 #include "src/__support/CPP/array.h"
-#include "src/__support/CPP/limits.h"
 #include "src/__support/CPP/string_view.h"
 #include "src/__support/CPP/type_traits.h"
 #include "src/__support/FPUtil/FPBits.h"
 #include "src/__support/FPUtil/sqrt.h"
 #include "src/__support/macros/config.h"
-#include "src/time/clock.h"
 
 namespace LIBC_NAMESPACE_DECL {
 

diff --git a/libc/benchmarks/gpu/timing/amdgpu/CMakeLists.txt b/libc/benchmarks/gpu/timing/amdgpu/CMakeLists.txt
@@ -4,10 +4,11 @@ add_header_library(
     timing.h
   DEPENDS
     libc.hdr.stdint_proxy
-    libc.src.__support.common
     libc.src.__support.macros.config
     libc.src.__support.macros.attributes
     libc.src.__support.CPP.algorithm
     libc.src.__support.CPP.array
+    libc.src.__support.CPP.atomic
     libc.src.__support.CPP.type_traits
+    libc.src.__support.GPU.utils
 )
diff --git a/libc/benchmarks/gpu/timing/amdgpu/timing.h b/libc/benchmarks/gpu/timing/amdgpu/timing.h
@@ -15,7 +15,6 @@
 #include "src/__support/CPP/atomic.h"
 #include "src/__support/CPP/type_traits.h"
 #include "src/__support/GPU/utils.h"
-#include "src/__support/common.h"
 #include "src/__support/macros/attributes.h"
 #include "src/__support/macros/config.h"
 

diff --git a/libc/benchmarks/gpu/timing/nvptx/CMakeLists.txt b/libc/benchmarks/gpu/timing/nvptx/CMakeLists.txt
@@ -4,10 +4,11 @@ add_header_library(
     timing.h
   DEPENDS
     libc.hdr.stdint_proxy
-    libc.src.__support.common
     libc.src.__support.macros.config
     libc.src.__support.macros.attributes
     libc.src.__support.CPP.algorithm
     libc.src.__support.CPP.array
+    libc.src.__support.CPP.atomic
     libc.src.__support.CPP.type_traits
+    libc.src.__support.GPU.utils
 )
diff --git a/libc/benchmarks/gpu/timing/nvptx/timing.h b/libc/benchmarks/gpu/timing/nvptx/timing.h
@@ -13,9 +13,7 @@
 #include "src/__support/CPP/algorithm.h"
 #include "src/__support/CPP/array.h"
 #include "src/__support/CPP/atomic.h"
-#include "src/__support/CPP/type_traits.h"
 #include "src/__support/GPU/utils.h"
-#include "src/__support/common.h"
 #include "src/__support/macros/attributes.h"
 #include "src/__support/macros/config.h"
 
@@ -66,7 +64,7 @@ template <typename F, typename T>
   uint64_t stop = gpu::processor_clock();
   cpp::atomic_thread_fence(cpp::MemoryOrder::ACQ_REL);
   asm("" ::"r"(stop));
-  volatile T output = result;
+  volatile auto output = result;
 
   // Return the time elapsed.
   return stop - start;