Skip to content

Commit 8d7b50e

Browse files
[Offload][Conformance] Add RandomGenerator for large input spaces (#154252)
This patch implements the `RandomGenerator`, a new input generator that enables conformance testing for functions with large input spaces (e.g., double-precision math functions). **Architectural Refactoring** To support different generation strategies in a clean and extensible way, the existing `ExhaustiveGenerator` was refactored into a new class hierarchy: * A new abstract base class, `RangeBasedGenerator`, was introduced using the Curiously Recurring Template Pattern (CRTP). It contains the common logic for generators that operate on a sequence of ranges. * `ExhaustiveGenerator` now inherits from this base class, simplifying its implementation. **New Components** * The new `RandomGenerator` class also inherits from `RangeBasedGenerator`. It implements a strategy that randomly samples a specified number of points from the total input space. * Random number generation is handled by a new, self-contained `RandomState` class (a `xorshift64*` PRNG seeded with `splitmix64`) to ensure deterministic and reproducible random streams for testing. **Example Usage** As a first use case and demonstration of this new capability, this patch also adds the first double-precision conformance test for the `log` function. This test uses the new `RandomGenerator` to validate the implementations from the `llvm-libm`, `cuda-math`, and `hip-math` providers.
1 parent 9888f0c commit 8d7b50e

File tree

10 files changed

+361
-73
lines changed

10 files changed

+361
-73
lines changed

offload/unittests/Conformance/device_code/CUDAMath.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,11 @@ __gpu_kernel void expm1fKernel(const float *X, float *Out,
119119
runKernelBody<__nv_expm1f>(NumElements, Out, X);
120120
}
121121

122+
__gpu_kernel void logKernel(const double *X, double *Out,
123+
size_t NumElements) noexcept {
124+
runKernelBody<__nv_log>(NumElements, Out, X);
125+
}
126+
122127
__gpu_kernel void logfKernel(const float *X, float *Out,
123128
size_t NumElements) noexcept {
124129
runKernelBody<__nv_logf>(NumElements, Out, X);

offload/unittests/Conformance/device_code/DeviceAPIs.hpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,7 @@ float __nv_expf(float);
6363
float __nv_exp10f(float);
6464
float __nv_exp2f(float);
6565
float __nv_expm1f(float);
66+
double __nv_log(double);
6667
float __nv_logf(float);
6768
float __nv_log10f(float);
6869
float __nv_log1pf(float);
@@ -96,6 +97,7 @@ float __ocml_exp_f32(float);
9697
float __ocml_exp10_f32(float);
9798
float __ocml_exp2_f32(float);
9899
float __ocml_expm1_f32(float);
100+
double __ocml_log_f64(double);
99101
float __ocml_log_f32(float);
100102
float __ocml_log10_f32(float);
101103
float __ocml_log1p_f32(float);

offload/unittests/Conformance/device_code/HIPMath.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,11 @@ __gpu_kernel void expm1fKernel(const float *X, float *Out,
119119
runKernelBody<__ocml_expm1_f32>(NumElements, Out, X);
120120
}
121121

122+
__gpu_kernel void logKernel(const double *X, double *Out,
123+
size_t NumElements) noexcept {
124+
runKernelBody<__ocml_log_f64>(NumElements, Out, X);
125+
}
126+
122127
__gpu_kernel void logfKernel(const float *X, float *Out,
123128
size_t NumElements) noexcept {
124129
runKernelBody<__ocml_log_f32>(NumElements, Out, X);

offload/unittests/Conformance/device_code/LLVMLibm.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,11 @@ __gpu_kernel void hypotf16Kernel(const float16 *X, float16 *Y, float16 *Out,
123123
runKernelBody<hypotf16>(NumElements, Out, X, Y);
124124
}
125125

126+
__gpu_kernel void logKernel(const double *X, double *Out,
127+
size_t NumElements) noexcept {
128+
runKernelBody<log>(NumElements, Out, X);
129+
}
130+
126131
__gpu_kernel void logfKernel(const float *X, float *Out,
127132
size_t NumElements) noexcept {
128133
runKernelBody<logf>(NumElements, Out, X);

offload/unittests/Conformance/include/mathtest/ExhaustiveGenerator.hpp

Lines changed: 52 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -8,98 +8,71 @@
88
///
99
/// \file
1010
/// This file contains the definition of the ExhaustiveGenerator class, a
11-
/// concrete input generator that exhaustively creates inputs from a given
12-
/// sequence of ranges.
11+
/// concrete range-based generator that exhaustively creates inputs from a
12+
/// given sequence of ranges.
1313
///
1414
//===----------------------------------------------------------------------===//
1515

1616
#ifndef MATHTEST_EXHAUSTIVEGENERATOR_HPP
1717
#define MATHTEST_EXHAUSTIVEGENERATOR_HPP
1818

1919
#include "mathtest/IndexedRange.hpp"
20-
#include "mathtest/InputGenerator.hpp"
20+
#include "mathtest/RangeBasedGenerator.hpp"
2121

22-
#include "llvm/ADT/ArrayRef.h"
23-
#include "llvm/Support/Parallel.h"
24-
25-
#include <algorithm>
2622
#include <array>
2723
#include <cassert>
2824
#include <cstddef>
2925
#include <cstdint>
26+
#include <optional>
3027
#include <tuple>
3128

3229
namespace mathtest {
3330

3431
template <typename... InTypes>
3532
class [[nodiscard]] ExhaustiveGenerator final
36-
: public InputGenerator<InTypes...> {
37-
static constexpr std::size_t NumInputs = sizeof...(InTypes);
38-
static_assert(NumInputs > 0, "The number of inputs must be at least 1");
33+
: public RangeBasedGenerator<ExhaustiveGenerator<InTypes...>, InTypes...> {
34+
35+
friend class RangeBasedGenerator<ExhaustiveGenerator<InTypes...>, InTypes...>;
36+
37+
using Base = RangeBasedGenerator<ExhaustiveGenerator<InTypes...>, InTypes...>;
38+
using IndexArrayType = std::array<uint64_t, Base::NumInputs>;
39+
40+
using Base::RangesTuple;
41+
using Base::Size;
3942

4043
public:
4144
explicit constexpr ExhaustiveGenerator(
4245
const IndexedRange<InTypes> &...Ranges) noexcept
43-
: RangesTuple(Ranges...) {
44-
bool Overflowed = getSizeWithOverflow(Ranges..., Size);
46+
: Base(Ranges...) {
47+
const auto MaybeSize = getInputSpaceSize(Ranges...);
48+
49+
assert(MaybeSize.has_value() && "The size is too large");
50+
Size = *MaybeSize;
4551

46-
assert(!Overflowed && "The input space size is too large");
47-
assert((Size > 0) && "The input space size must be at least 1");
52+
assert((Size > 0) && "The size must be at least 1");
4853

4954
IndexArrayType DimSizes = {};
5055
std::size_t DimIndex = 0;
5156
((DimSizes[DimIndex++] = Ranges.getSize()), ...);
5257

53-
Strides[NumInputs - 1] = 1;
54-
if constexpr (NumInputs > 1)
55-
for (int Index = static_cast<int>(NumInputs) - 2; Index >= 0; --Index)
58+
Strides[Base::NumInputs - 1] = 1;
59+
if constexpr (Base::NumInputs > 1)
60+
for (int Index = static_cast<int>(Base::NumInputs) - 2; Index >= 0;
61+
--Index)
5662
Strides[Index] = Strides[Index + 1] * DimSizes[Index + 1];
5763
}
5864

59-
void reset() noexcept override { NextFlatIndex = 0; }
60-
61-
[[nodiscard]] std::size_t
62-
fill(llvm::MutableArrayRef<InTypes>... Buffers) noexcept override {
63-
const std::array<std::size_t, NumInputs> BufferSizes = {Buffers.size()...};
64-
const std::size_t BufferSize = BufferSizes[0];
65-
assert((BufferSize != 0) && "Buffer size cannot be zero");
66-
assert(std::all_of(BufferSizes.begin(), BufferSizes.end(),
67-
[&](std::size_t Size) { return Size == BufferSize; }) &&
68-
"All input buffers must have the same size");
69-
70-
if (NextFlatIndex >= Size)
71-
return 0;
72-
73-
const auto BatchSize = std::min<uint64_t>(BufferSize, Size - NextFlatIndex);
74-
const auto CurrentFlatIndex = NextFlatIndex;
75-
NextFlatIndex += BatchSize;
76-
77-
auto BufferPtrsTuple = std::make_tuple(Buffers.data()...);
78-
79-
llvm::parallelFor(0, BatchSize, [&](std::size_t Offset) {
80-
writeInputs(CurrentFlatIndex, Offset, BufferPtrsTuple);
81-
});
82-
83-
return static_cast<std::size_t>(BatchSize);
84-
}
85-
8665
private:
87-
using RangesTupleType = std::tuple<IndexedRange<InTypes>...>;
88-
using IndexArrayType = std::array<uint64_t, NumInputs>;
89-
90-
static bool getSizeWithOverflow(const IndexedRange<InTypes> &...Ranges,
91-
uint64_t &Size) noexcept {
92-
Size = 1;
93-
bool Overflowed = false;
94-
95-
auto Multiplier = [&](const uint64_t RangeSize) {
96-
if (!Overflowed)
97-
Overflowed = __builtin_mul_overflow(Size, RangeSize, &Size);
98-
};
66+
[[nodiscard]] constexpr IndexArrayType
67+
getNDIndex(uint64_t FlatIndex) const noexcept {
68+
IndexArrayType NDIndex;
9969

100-
(Multiplier(Ranges.getSize()), ...);
70+
for (std::size_t Index = 0; Index < Base::NumInputs; ++Index) {
71+
NDIndex[Index] = FlatIndex / Strides[Index];
72+
FlatIndex -= NDIndex[Index] * Strides[Index];
73+
}
10174

102-
return Overflowed;
75+
return NDIndex;
10376
}
10477

10578
template <typename BufferPtrsTupleType>
@@ -109,31 +82,37 @@ class [[nodiscard]] ExhaustiveGenerator final
10982
writeInputsImpl<0>(NDIndex, Offset, BufferPtrsTuple);
11083
}
11184

112-
constexpr IndexArrayType getNDIndex(uint64_t FlatIndex) const noexcept {
113-
IndexArrayType NDIndex;
114-
115-
for (std::size_t Index = 0; Index < NumInputs; ++Index) {
116-
NDIndex[Index] = FlatIndex / Strides[Index];
117-
FlatIndex -= NDIndex[Index] * Strides[Index];
118-
}
119-
120-
return NDIndex;
121-
}
122-
12385
template <std::size_t Index, typename BufferPtrsTupleType>
12486
void writeInputsImpl(IndexArrayType NDIndex, uint64_t Offset,
12587
BufferPtrsTupleType BufferPtrsTuple) const noexcept {
126-
if constexpr (Index < NumInputs) {
88+
if constexpr (Index < Base::NumInputs) {
12789
const auto &Range = std::get<Index>(RangesTuple);
12890
std::get<Index>(BufferPtrsTuple)[Offset] = Range[NDIndex[Index]];
91+
12992
writeInputsImpl<Index + 1>(NDIndex, Offset, BufferPtrsTuple);
13093
}
13194
}
13295

133-
uint64_t Size = 1;
134-
RangesTupleType RangesTuple;
96+
[[nodiscard]] static constexpr std::optional<uint64_t>
97+
getInputSpaceSize(const IndexedRange<InTypes> &...Ranges) noexcept {
98+
uint64_t InputSpaceSize = 1;
99+
bool Overflowed = false;
100+
101+
auto Multiplier = [&](const uint64_t RangeSize) {
102+
if (!Overflowed)
103+
Overflowed =
104+
__builtin_mul_overflow(InputSpaceSize, RangeSize, &InputSpaceSize);
105+
};
106+
107+
(Multiplier(Ranges.getSize()), ...);
108+
109+
if (Overflowed)
110+
return std::nullopt;
111+
112+
return InputSpaceSize;
113+
}
114+
135115
IndexArrayType Strides = {};
136-
uint64_t NextFlatIndex = 0;
137116
};
138117
} // namespace mathtest
139118

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
///
9+
/// \file
10+
/// This file contains the definition of the RandomGenerator class, a concrete
11+
/// range-based generator that randomly creates inputs from a given sequence of
12+
/// ranges.
13+
///
14+
//===----------------------------------------------------------------------===//
15+
16+
#ifndef MATHTEST_RANDOMGENERATOR_HPP
17+
#define MATHTEST_RANDOMGENERATOR_HPP
18+
19+
#include "mathtest/IndexedRange.hpp"
20+
#include "mathtest/RandomState.hpp"
21+
#include "mathtest/RangeBasedGenerator.hpp"
22+
23+
#include <cstddef>
24+
#include <cstdint>
25+
#include <tuple>
26+
27+
namespace mathtest {
28+
29+
template <typename... InTypes>
30+
class [[nodiscard]] RandomGenerator final
31+
: public RangeBasedGenerator<RandomGenerator<InTypes...>, InTypes...> {
32+
33+
friend class RangeBasedGenerator<RandomGenerator<InTypes...>, InTypes...>;
34+
35+
using Base = RangeBasedGenerator<RandomGenerator<InTypes...>, InTypes...>;
36+
37+
using Base::RangesTuple;
38+
using Base::Size;
39+
40+
public:
41+
explicit constexpr RandomGenerator(
42+
SeedTy BaseSeed, uint64_t Size,
43+
const IndexedRange<InTypes> &...Ranges) noexcept
44+
: Base(Size, Ranges...), BaseSeed(BaseSeed) {}
45+
46+
private:
47+
[[nodiscard]] static uint64_t getRandomIndex(RandomState &RNG,
48+
uint64_t RangeSize) noexcept {
49+
if (RangeSize == 0)
50+
return 0;
51+
52+
const uint64_t Threshold = (-RangeSize) % RangeSize;
53+
54+
uint64_t RandomNumber;
55+
do {
56+
RandomNumber = RNG.next();
57+
} while (RandomNumber < Threshold);
58+
59+
return RandomNumber % RangeSize;
60+
}
61+
62+
template <typename BufferPtrsTupleType>
63+
void writeInputs(uint64_t CurrentFlatIndex, uint64_t Offset,
64+
BufferPtrsTupleType BufferPtrsTuple) const noexcept {
65+
66+
RandomState RNG(SeedTy{BaseSeed.Value ^ (CurrentFlatIndex + Offset)});
67+
writeInputsImpl<0>(RNG, Offset, BufferPtrsTuple);
68+
}
69+
70+
template <std::size_t Index, typename BufferPtrsTupleType>
71+
void writeInputsImpl(RandomState &RNG, uint64_t Offset,
72+
BufferPtrsTupleType BufferPtrsTuple) const noexcept {
73+
if constexpr (Index < Base::NumInputs) {
74+
const auto &Range = std::get<Index>(RangesTuple);
75+
const auto RandomIndex = getRandomIndex(RNG, Range.getSize());
76+
std::get<Index>(BufferPtrsTuple)[Offset] = Range[RandomIndex];
77+
78+
writeInputsImpl<Index + 1>(RNG, Offset, BufferPtrsTuple);
79+
}
80+
}
81+
82+
SeedTy BaseSeed;
83+
};
84+
} // namespace mathtest
85+
86+
#endif // MATHTEST_RANDOMGENERATOR_HPP
Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
///
9+
/// \file
10+
/// This file contains the definition of the RandomState class, a fast and
11+
/// lightweight pseudo-random number generator.
12+
///
13+
/// The implementation is based on the xorshift* generator, seeded using the
14+
/// SplitMix64 generator for robust initialization. For more details on the
15+
/// algorithm, see: https://en.wikipedia.org/wiki/Xorshift
16+
///
17+
//===----------------------------------------------------------------------===//
18+
19+
#ifndef MATHTEST_RANDOMSTATE_HPP
20+
#define MATHTEST_RANDOMSTATE_HPP
21+
22+
#include <cstdint>
23+
24+
struct SeedTy {
25+
uint64_t Value;
26+
};
27+
28+
class [[nodiscard]] RandomState {
29+
uint64_t State;
30+
31+
[[nodiscard]] static constexpr uint64_t splitMix64(uint64_t X) noexcept {
32+
X += 0x9E3779B97F4A7C15ULL;
33+
X = (X ^ (X >> 30)) * 0xBF58476D1CE4E5B9ULL;
34+
X = (X ^ (X >> 27)) * 0x94D049BB133111EBULL;
35+
X = (X ^ (X >> 31));
36+
return X ? X : 0x9E3779B97F4A7C15ULL;
37+
}
38+
39+
public:
40+
explicit constexpr RandomState(SeedTy Seed) noexcept
41+
: State(splitMix64(Seed.Value)) {}
42+
43+
inline uint64_t next() noexcept {
44+
uint64_t X = State;
45+
X ^= X >> 12;
46+
X ^= X << 25;
47+
X ^= X >> 27;
48+
State = X;
49+
return X * 0x2545F4914F6CDD1DULL;
50+
}
51+
};
52+
53+
#endif // MATHTEST_RANDOMSTATE_HPP

0 commit comments

Comments
 (0)