[Offload][Conformance] Add RandomGenerator for large input spaces (#154252)

leandrolcampos · web-flow · commit 8d7b50e5725e · 2025-08-20T13:37:01.000-05:00
This patch implements the `RandomGenerator`, a new input generator that
enables conformance testing for functions with large input spaces (e.g.,
double-precision math functions).

**Architectural Refactoring**

To support different generation strategies in a clean and extensible
way, the existing `ExhaustiveGenerator` was refactored into a new class
hierarchy:
* A new abstract base class, `RangeBasedGenerator`, was introduced using
the Curiously Recurring Template Pattern (CRTP). It contains the common
logic for generators that operate on a sequence of ranges.
* `ExhaustiveGenerator` now inherits from this base class, simplifying
its implementation.

**New Components**
* The new `RandomGenerator` class also inherits from
`RangeBasedGenerator`. It implements a strategy that randomly samples a
specified number of points from the total input space.
* Random number generation is handled by a new, self-contained
`RandomState` class (a `xorshift64*` PRNG seeded with `splitmix64`) to
ensure deterministic and reproducible random streams for testing.

**Example Usage**

As a first use case and demonstration of this new capability, this patch
also adds the first double-precision conformance test for the `log`
function. This test uses the new `RandomGenerator` to validate the
implementations from the `llvm-libm`, `cuda-math`, and `hip-math`
providers.
diff --git a/offload/unittests/Conformance/device_code/CUDAMath.cpp b/offload/unittests/Conformance/device_code/CUDAMath.cpp
@@ -119,6 +119,11 @@ __gpu_kernel void expm1fKernel(const float *X, float *Out,
   runKernelBody<__nv_expm1f>(NumElements, Out, X);
 }
 
+__gpu_kernel void logKernel(const double *X, double *Out,
+                            size_t NumElements) noexcept {
+  runKernelBody<__nv_log>(NumElements, Out, X);
+}
+
 __gpu_kernel void logfKernel(const float *X, float *Out,
                              size_t NumElements) noexcept {
   runKernelBody<__nv_logf>(NumElements, Out, X);
diff --git a/offload/unittests/Conformance/device_code/DeviceAPIs.hpp b/offload/unittests/Conformance/device_code/DeviceAPIs.hpp
@@ -63,6 +63,7 @@ float __nv_expf(float);
 float __nv_exp10f(float);
 float __nv_exp2f(float);
 float __nv_expm1f(float);
+double __nv_log(double);
 float __nv_logf(float);
 float __nv_log10f(float);
 float __nv_log1pf(float);
@@ -96,6 +97,7 @@ float __ocml_exp_f32(float);
 float __ocml_exp10_f32(float);
 float __ocml_exp2_f32(float);
 float __ocml_expm1_f32(float);
+double __ocml_log_f64(double);
 float __ocml_log_f32(float);
 float __ocml_log10_f32(float);
 float __ocml_log1p_f32(float);
diff --git a/offload/unittests/Conformance/device_code/HIPMath.cpp b/offload/unittests/Conformance/device_code/HIPMath.cpp
@@ -119,6 +119,11 @@ __gpu_kernel void expm1fKernel(const float *X, float *Out,
   runKernelBody<__ocml_expm1_f32>(NumElements, Out, X);
 }
 
+__gpu_kernel void logKernel(const double *X, double *Out,
+                            size_t NumElements) noexcept {
+  runKernelBody<__ocml_log_f64>(NumElements, Out, X);
+}
+
 __gpu_kernel void logfKernel(const float *X, float *Out,
                              size_t NumElements) noexcept {
   runKernelBody<__ocml_log_f32>(NumElements, Out, X);
diff --git a/offload/unittests/Conformance/device_code/LLVMLibm.cpp b/offload/unittests/Conformance/device_code/LLVMLibm.cpp
@@ -123,6 +123,11 @@ __gpu_kernel void hypotf16Kernel(const float16 *X, float16 *Y, float16 *Out,
   runKernelBody<hypotf16>(NumElements, Out, X, Y);
 }
 
+__gpu_kernel void logKernel(const double *X, double *Out,
+                            size_t NumElements) noexcept {
+  runKernelBody<log>(NumElements, Out, X);
+}
+
 __gpu_kernel void logfKernel(const float *X, float *Out,
                              size_t NumElements) noexcept {
   runKernelBody<logf>(NumElements, Out, X);
diff --git a/offload/unittests/Conformance/include/mathtest/ExhaustiveGenerator.hpp b/offload/unittests/Conformance/include/mathtest/ExhaustiveGenerator.hpp
@@ -8,98 +8,71 @@
 ///
 /// \file
 /// This file contains the definition of the ExhaustiveGenerator class, a
-/// concrete input generator that exhaustively creates inputs from a given
-/// sequence of ranges.
+/// concrete range-based generator that exhaustively creates inputs from a
+/// given sequence of ranges.
 ///
 //===----------------------------------------------------------------------===//
 
 #ifndef MATHTEST_EXHAUSTIVEGENERATOR_HPP
 #define MATHTEST_EXHAUSTIVEGENERATOR_HPP
 
 #include "mathtest/IndexedRange.hpp"
-#include "mathtest/InputGenerator.hpp"
+#include "mathtest/RangeBasedGenerator.hpp"
 
-#include "llvm/ADT/ArrayRef.h"
-#include "llvm/Support/Parallel.h"
-
-#include <algorithm>
 #include <array>
 #include <cassert>
 #include <cstddef>
 #include <cstdint>
+#include <optional>
 #include <tuple>
 
 namespace mathtest {
 
 template <typename... InTypes>
 class [[nodiscard]] ExhaustiveGenerator final
-    : public InputGenerator<InTypes...> {
-  static constexpr std::size_t NumInputs = sizeof...(InTypes);
-  static_assert(NumInputs > 0, "The number of inputs must be at least 1");
+    : public RangeBasedGenerator<ExhaustiveGenerator<InTypes...>, InTypes...> {
+
+  friend class RangeBasedGenerator<ExhaustiveGenerator<InTypes...>, InTypes...>;
+
+  using Base = RangeBasedGenerator<ExhaustiveGenerator<InTypes...>, InTypes...>;
+  using IndexArrayType = std::array<uint64_t, Base::NumInputs>;
+
+  using Base::RangesTuple;
+  using Base::Size;
 
 public:
   explicit constexpr ExhaustiveGenerator(
       const IndexedRange<InTypes> &...Ranges) noexcept
-      : RangesTuple(Ranges...) {
-    bool Overflowed = getSizeWithOverflow(Ranges..., Size);
+      : Base(Ranges...) {
+    const auto MaybeSize = getInputSpaceSize(Ranges...);
+
+    assert(MaybeSize.has_value() && "The size is too large");
+    Size = *MaybeSize;
 
-    assert(!Overflowed && "The input space size is too large");
-    assert((Size > 0) && "The input space size must be at least 1");
+    assert((Size > 0) && "The size must be at least 1");
 
     IndexArrayType DimSizes = {};
     std::size_t DimIndex = 0;
     ((DimSizes[DimIndex++] = Ranges.getSize()), ...);
 
-    Strides[NumInputs - 1] = 1;
-    if constexpr (NumInputs > 1)
-      for (int Index = static_cast<int>(NumInputs) - 2; Index >= 0; --Index)
+    Strides[Base::NumInputs - 1] = 1;
+    if constexpr (Base::NumInputs > 1)
+      for (int Index = static_cast<int>(Base::NumInputs) - 2; Index >= 0;
+           --Index)
         Strides[Index] = Strides[Index + 1] * DimSizes[Index + 1];
   }
 
-  void reset() noexcept override { NextFlatIndex = 0; }
-
-  [[nodiscard]] std::size_t
-  fill(llvm::MutableArrayRef<InTypes>... Buffers) noexcept override {
-    const std::array<std::size_t, NumInputs> BufferSizes = {Buffers.size()...};
-    const std::size_t BufferSize = BufferSizes[0];
-    assert((BufferSize != 0) && "Buffer size cannot be zero");
-    assert(std::all_of(BufferSizes.begin(), BufferSizes.end(),
-                       [&](std::size_t Size) { return Size == BufferSize; }) &&
-           "All input buffers must have the same size");
-
-    if (NextFlatIndex >= Size)
-      return 0;
-
-    const auto BatchSize = std::min<uint64_t>(BufferSize, Size - NextFlatIndex);
-    const auto CurrentFlatIndex = NextFlatIndex;
-    NextFlatIndex += BatchSize;
-
-    auto BufferPtrsTuple = std::make_tuple(Buffers.data()...);
-
-    llvm::parallelFor(0, BatchSize, [&](std::size_t Offset) {
-      writeInputs(CurrentFlatIndex, Offset, BufferPtrsTuple);
-    });
-
-    return static_cast<std::size_t>(BatchSize);
-  }
-
 private:
-  using RangesTupleType = std::tuple<IndexedRange<InTypes>...>;
-  using IndexArrayType = std::array<uint64_t, NumInputs>;
-
-  static bool getSizeWithOverflow(const IndexedRange<InTypes> &...Ranges,
-                                  uint64_t &Size) noexcept {
-    Size = 1;
-    bool Overflowed = false;
-
-    auto Multiplier = [&](const uint64_t RangeSize) {
-      if (!Overflowed)
-        Overflowed = __builtin_mul_overflow(Size, RangeSize, &Size);
-    };
+  [[nodiscard]] constexpr IndexArrayType
+  getNDIndex(uint64_t FlatIndex) const noexcept {
+    IndexArrayType NDIndex;
 
-    (Multiplier(Ranges.getSize()), ...);
+    for (std::size_t Index = 0; Index < Base::NumInputs; ++Index) {
+      NDIndex[Index] = FlatIndex / Strides[Index];
+      FlatIndex -= NDIndex[Index] * Strides[Index];
+    }
 
-    return Overflowed;
+    return NDIndex;
   }
 
   template <typename BufferPtrsTupleType>
@@ -109,31 +82,37 @@ class [[nodiscard]] ExhaustiveGenerator final
     writeInputsImpl<0>(NDIndex, Offset, BufferPtrsTuple);
   }
 
-  constexpr IndexArrayType getNDIndex(uint64_t FlatIndex) const noexcept {
-    IndexArrayType NDIndex;
-
-    for (std::size_t Index = 0; Index < NumInputs; ++Index) {
-      NDIndex[Index] = FlatIndex / Strides[Index];
-      FlatIndex -= NDIndex[Index] * Strides[Index];
-    }
-
-    return NDIndex;
-  }
-
   template <std::size_t Index, typename BufferPtrsTupleType>
   void writeInputsImpl(IndexArrayType NDIndex, uint64_t Offset,
                        BufferPtrsTupleType BufferPtrsTuple) const noexcept {
-    if constexpr (Index < NumInputs) {
+    if constexpr (Index < Base::NumInputs) {
       const auto &Range = std::get<Index>(RangesTuple);
       std::get<Index>(BufferPtrsTuple)[Offset] = Range[NDIndex[Index]];
+
       writeInputsImpl<Index + 1>(NDIndex, Offset, BufferPtrsTuple);
     }
   }
 
-  uint64_t Size = 1;
-  RangesTupleType RangesTuple;
+  [[nodiscard]] static constexpr std::optional<uint64_t>
+  getInputSpaceSize(const IndexedRange<InTypes> &...Ranges) noexcept {
+    uint64_t InputSpaceSize = 1;
+    bool Overflowed = false;
+
+    auto Multiplier = [&](const uint64_t RangeSize) {
+      if (!Overflowed)
+        Overflowed =
+            __builtin_mul_overflow(InputSpaceSize, RangeSize, &InputSpaceSize);
+    };
+
+    (Multiplier(Ranges.getSize()), ...);
+
+    if (Overflowed)
+      return std::nullopt;
+
+    return InputSpaceSize;
+  }
+
   IndexArrayType Strides = {};
-  uint64_t NextFlatIndex = 0;
 };
 } // namespace mathtest
 
diff --git a/offload/unittests/Conformance/include/mathtest/RandomGenerator.hpp b/offload/unittests/Conformance/include/mathtest/RandomGenerator.hpp
@@ -0,0 +1,86 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file contains the definition of the RandomGenerator class, a concrete
+/// range-based generator that randomly creates inputs from a given sequence of
+/// ranges.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef MATHTEST_RANDOMGENERATOR_HPP
+#define MATHTEST_RANDOMGENERATOR_HPP
+
+#include "mathtest/IndexedRange.hpp"
+#include "mathtest/RandomState.hpp"
+#include "mathtest/RangeBasedGenerator.hpp"
+
+#include <cstddef>
+#include <cstdint>
+#include <tuple>
+
+namespace mathtest {
+
+template <typename... InTypes>
+class [[nodiscard]] RandomGenerator final
+    : public RangeBasedGenerator<RandomGenerator<InTypes...>, InTypes...> {
+
+  friend class RangeBasedGenerator<RandomGenerator<InTypes...>, InTypes...>;
+
+  using Base = RangeBasedGenerator<RandomGenerator<InTypes...>, InTypes...>;
+
+  using Base::RangesTuple;
+  using Base::Size;
+
+public:
+  explicit constexpr RandomGenerator(
+      SeedTy BaseSeed, uint64_t Size,
+      const IndexedRange<InTypes> &...Ranges) noexcept
+      : Base(Size, Ranges...), BaseSeed(BaseSeed) {}
+
+private:
+  [[nodiscard]] static uint64_t getRandomIndex(RandomState &RNG,
+                                               uint64_t RangeSize) noexcept {
+    if (RangeSize == 0)
+      return 0;
+
+    const uint64_t Threshold = (-RangeSize) % RangeSize;
+
+    uint64_t RandomNumber;
+    do {
+      RandomNumber = RNG.next();
+    } while (RandomNumber < Threshold);
+
+    return RandomNumber % RangeSize;
+  }
+
+  template <typename BufferPtrsTupleType>
+  void writeInputs(uint64_t CurrentFlatIndex, uint64_t Offset,
+                   BufferPtrsTupleType BufferPtrsTuple) const noexcept {
+
+    RandomState RNG(SeedTy{BaseSeed.Value ^ (CurrentFlatIndex + Offset)});
+    writeInputsImpl<0>(RNG, Offset, BufferPtrsTuple);
+  }
+
+  template <std::size_t Index, typename BufferPtrsTupleType>
+  void writeInputsImpl(RandomState &RNG, uint64_t Offset,
+                       BufferPtrsTupleType BufferPtrsTuple) const noexcept {
+    if constexpr (Index < Base::NumInputs) {
+      const auto &Range = std::get<Index>(RangesTuple);
+      const auto RandomIndex = getRandomIndex(RNG, Range.getSize());
+      std::get<Index>(BufferPtrsTuple)[Offset] = Range[RandomIndex];
+
+      writeInputsImpl<Index + 1>(RNG, Offset, BufferPtrsTuple);
+    }
+  }
+
+  SeedTy BaseSeed;
+};
+} // namespace mathtest
+
+#endif // MATHTEST_RANDOMGENERATOR_HPP
diff --git a/offload/unittests/Conformance/include/mathtest/RandomState.hpp b/offload/unittests/Conformance/include/mathtest/RandomState.hpp
@@ -0,0 +1,53 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file contains the definition of the RandomState class, a fast and
+/// lightweight pseudo-random number generator.
+///
+/// The implementation is based on the xorshift* generator, seeded using the
+/// SplitMix64 generator for robust initialization. For more details on the
+/// algorithm, see: https://en.wikipedia.org/wiki/Xorshift
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef MATHTEST_RANDOMSTATE_HPP
+#define MATHTEST_RANDOMSTATE_HPP
+
+#include <cstdint>
+
+struct SeedTy {
+  uint64_t Value;
+};
+
+class [[nodiscard]] RandomState {
+  uint64_t State;
+
+  [[nodiscard]] static constexpr uint64_t splitMix64(uint64_t X) noexcept {
+    X += 0x9E3779B97F4A7C15ULL;
+    X = (X ^ (X >> 30)) * 0xBF58476D1CE4E5B9ULL;
+    X = (X ^ (X >> 27)) * 0x94D049BB133111EBULL;
+    X = (X ^ (X >> 31));
+    return X ? X : 0x9E3779B97F4A7C15ULL;
+  }
+
+public:
+  explicit constexpr RandomState(SeedTy Seed) noexcept
+      : State(splitMix64(Seed.Value)) {}
+
+  inline uint64_t next() noexcept {
+    uint64_t X = State;
+    X ^= X >> 12;
+    X ^= X << 25;
+    X ^= X >> 27;
+    State = X;
+    return X * 0x2545F4914F6CDD1DULL;
+  }
+};
+
+#endif // MATHTEST_RANDOMSTATE_HPP
diff --git a/offload/unittests/Conformance/include/mathtest/RangeBasedGenerator.hpp b/offload/unittests/Conformance/include/mathtest/RangeBasedGenerator.hpp
diff --git a/offload/unittests/Conformance/tests/CMakeLists.txt b/offload/unittests/Conformance/tests/CMakeLists.txt
diff --git a/offload/unittests/Conformance/tests/LogTest.cpp b/offload/unittests/Conformance/tests/LogTest.cpp