Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
a45b73a
[Offload] Add framework for math conformance tests
leandrolcampos Jul 17, 2025
db0e544
Add trailing newline
leandrolcampos Jul 17, 2025
dd563e5
Pass correct GPU architecture to device compiler
leandrolcampos Jul 17, 2025
3931dd4
Annotate `#endif` directives for clarity
leandrolcampos Jul 17, 2025
e711cc9
Rename aliases for consistency
leandrolcampos Jul 18, 2025
ec228fd
Replace `#pragma once` with standard include guards
leandrolcampos Jul 18, 2025
bfed21b
Omit braces for simple single-statement blocks
leandrolcampos Jul 19, 2025
80b7d61
Align AMDGPU device compilation flags with NVPTX
leandrolcampos Jul 20, 2025
267973b
Add platform-aware constructor to `DeviceContext`
leandrolcampos Jul 20, 2025
8746d14
Generate tests based on `Provider` and `Platform`
leandrolcampos Jul 20, 2025
b79c984
Remove conditional compilation guards for `_Float16`
leandrolcampos Jul 21, 2025
5fb9cc3
Simplify implementation using libc's FPBits utility
leandrolcampos Jul 23, 2025
c337516
Add file headers
leandrolcampos Jul 23, 2025
3eb9dd4
Refine wording in Numerics.hpp file header
leandrolcampos Jul 23, 2025
d577f46
Simplify implementation of `ExhaustiveGenerator` for single-threaded use
leandrolcampos Jul 24, 2025
070d4dd
Add file header
leandrolcampos Jul 24, 2025
bde741a
Improve test report and API safety in GpuMathTest
leandrolcampos Jul 24, 2025
e935a32
Move some device compile flags to the call site
leandrolcampos Jul 24, 2025
a1acadf
Add non-fatal tryLoadBinary and tryGetKernel methods
leandrolcampos Jul 25, 2025
ebb67f4
Remove the Dim class
leandrolcampos Jul 25, 2025
c85da4c
Add support for the `--test-configs` command-line option
leandrolcampos Jul 28, 2025
4b273cd
Adapt olLaunchKernel call to a change in the Offload API
leandrolcampos Jul 29, 2025
a3a3d0d
Simplify config pair parser using `StringRef::split`
leandrolcampos Jul 29, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 6 additions & 7 deletions offload/unittests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ function(add_offload_test_device_code test_filename test_name)
COMMAND ${CMAKE_C_COMPILER}
--target=nvptx64-nvidia-cuda -march=${nvptx_arch}
-nogpulib --cuda-path=${CUDA_ROOT} -flto ${ARGN}
-c ${SRC_PATH} -o ${output_file}
${SRC_PATH} -o ${output_file}
DEPENDS ${SRC_PATH}
)
add_custom_target(${test_name}.nvptx64 DEPENDS ${output_file})
Expand All @@ -64,7 +64,7 @@ function(add_offload_test_device_code test_filename test_name)
OUTPUT ${output_file}
COMMAND ${CMAKE_C_COMPILER}
--target=amdgcn-amd-amdhsa -mcpu=${amdgpu_arch}
-nogpulib -flto ${ARGN} -c ${SRC_PATH} -o ${output_file}
-nogpulib -flto ${ARGN} ${SRC_PATH} -o ${output_file}
DEPENDS ${SRC_PATH}
)
add_custom_target(${test_name}.amdgpu DEPENDS ${output_file})
Expand Down Expand Up @@ -106,16 +106,15 @@ function(add_conformance_test test_name)
endif()

add_executable(${target_name} ${files})
add_dependencies(${target_name} ${PLUGINS_TEST_COMMON} ${test_name}.bin)
target_compile_definitions(${target_name} PRIVATE DEVICE_CODE_PATH="${CONFORMANCE_TEST_DEVICE_CODE_PATH}")
add_dependencies(${target_name} conformance_device_binaries)
target_compile_definitions(${target_name}
PRIVATE DEVICE_BINARY_DIR="${OFFLOAD_CONFORMANCE_DEVICE_BINARY_DIR}")
target_link_libraries(${target_name} PRIVATE ${PLUGINS_TEST_COMMON} libc)
target_include_directories(${target_name} PRIVATE ${PLUGINS_TEST_INCLUDE})
set_target_properties(${target_name} PROPERTIES EXCLUDE_FROM_ALL TRUE)

add_custom_target(offload.conformance.${test_name}
COMMAND $<TARGET_FILE:${target_name}>
DEPENDS ${target_name}
COMMENT "Running conformance test ${test_name}")
DEPENDS ${target_name})
add_dependencies(offload.conformance offload.conformance.${test_name})
endfunction()

Expand Down
7 changes: 3 additions & 4 deletions offload/unittests/Conformance/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
add_custom_target(offload.conformance)

set(PLUGINS_TEST_COMMON LLVMOffload LLVMSupport)
set(PLUGINS_TEST_INCLUDE ${LIBOMPTARGET_INCLUDE_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/common)
set(PLUGINS_TEST_COMMON MathTest)

add_subdirectory(device_code)

add_conformance_test(sin sin.cpp)
add_subdirectory(lib)
add_subdirectory(tests)
6 changes: 3 additions & 3 deletions offload/unittests/Conformance/device_code/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# FIXME: Currently missing dependencies to build GPU portion automatically.
add_offload_test_device_code(sin.c sin)
add_offload_test_device_code(LLVMLibm.c llvm-libm -stdlib -fno-builtin)

set(OFFLOAD_TEST_DEVICE_CODE_PATH ${CMAKE_CURRENT_BINARY_DIR} PARENT_SCOPE)
add_custom_target(conformance_device_binaries DEPENDS llvm-libm.bin)
set(OFFLOAD_CONFORMANCE_DEVICE_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR} PARENT_SCOPE)
37 changes: 37 additions & 0 deletions offload/unittests/Conformance/device_code/LLVMLibm.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
/// \file
/// This file contains the implementation of the device kernels that wrap the
/// math functions from the llvm-libm provider.
///
//===----------------------------------------------------------------------===//

#include <gpuintrin.h>
#include <math.h>
#include <stddef.h>
#include <stdint.h>

typedef _Float16 float16;

__gpu_kernel void hypotf16Kernel(const float16 *X, float16 *Y, float16 *Out,
size_t NumElements) {
uint32_t Index =
__gpu_num_threads_x() * __gpu_block_id_x() + __gpu_thread_id_x();

if (Index < NumElements)
Out[Index] = hypotf16(X[Index], Y[Index]);
}

__gpu_kernel void logfKernel(const float *X, float *Out, size_t NumElements) {
uint32_t Index =
__gpu_num_threads_x() * __gpu_block_id_x() + __gpu_thread_id_x();

if (Index < NumElements)
Out[Index] = logf(X[Index]);
}
4 changes: 0 additions & 4 deletions offload/unittests/Conformance/device_code/sin.c

This file was deleted.

101 changes: 101 additions & 0 deletions offload/unittests/Conformance/include/mathtest/CommandLine.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
/// \file
/// This file contains the definition of custom command-line argument parsers
/// using llvm::cl.
///
//===----------------------------------------------------------------------===//

#ifndef MATHTEST_COMMANDLINE_HPP
#define MATHTEST_COMMANDLINE_HPP

#include "mathtest/TestConfig.hpp"

#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/CommandLine.h"

#include <string>

namespace llvm {
namespace cl {

struct TestConfigsArg {
enum class Mode { Default, All, Explicit } Mode = Mode::Default;
llvm::SmallVector<mathtest::TestConfig, 4> Explicit;
};

template <> class parser<TestConfigsArg> : public basic_parser<TestConfigsArg> {
public:
parser(Option &O) : basic_parser<TestConfigsArg>(O) {}

static bool isAllowed(const mathtest::TestConfig &Config) {
static const llvm::SmallVector<mathtest::TestConfig, 4> &AllTestConfigs =
mathtest::getAllTestConfigs();

return llvm::is_contained(AllTestConfigs, Config);
}

bool parse(Option &O, StringRef ArgName, StringRef ArgValue,
TestConfigsArg &Val) {
ArgValue = ArgValue.trim();
if (ArgValue.empty())
return O.error(
"Expected '" + getValueName() +
"', but got an empty string. Omit the flag to use defaults");

if (ArgValue.equals_insensitive("all")) {
Val.Mode = TestConfigsArg::Mode::All;
return false;
}

llvm::SmallVector<StringRef, 8> Pairs;
ArgValue.split(Pairs, ',', /*MaxSplit=*/-1, /*KeepEmpty=*/false);

Val.Mode = TestConfigsArg::Mode::Explicit;
Val.Explicit.clear();

for (StringRef Pair : Pairs) {
llvm::SmallVector<StringRef, 2> Parts;
Pair.split(Parts, ':');

if (Parts.size() != 2)
return O.error("Expected '<provider>:<platform>', got '" + Pair + "'");

StringRef Provider = Parts[0].trim();
StringRef Platform = Parts[1].trim();

if (Provider.empty() || Platform.empty())
return O.error("Provider and platform must not be empty in '" + Pair +
"'");

mathtest::TestConfig Config = {Provider.str(), Platform.str()};
if (!isAllowed(Config))
return O.error("Invalid pair '" + Pair + "'");

Val.Explicit.push_back(Config);
}

return false;
}

StringRef getValueName() const override {
return "all|provider:platform[,provider:platform...]";
}

void printOptionDiff(const Option &O, const TestConfigsArg &V, OptVal Default,
size_t GlobalWidth) const {
printOptionNoValue(O, GlobalWidth);
}
};
} // namespace cl
} // namespace llvm

#endif // MATHTEST_COMMANDLINE_HPP
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
/// \file
/// This file contains the declaration of the command-line options and the main
/// interface for selecting test configurations.
///
//===----------------------------------------------------------------------===//

#ifndef MATHTEST_COMMANDLINEEXTRAS_HPP
#define MATHTEST_COMMANDLINEEXTRAS_HPP

#include "mathtest/CommandLine.hpp"
#include "mathtest/TestConfig.hpp"

#include "llvm/ADT/SmallVector.h"
#include "llvm/Support/CommandLine.h"

namespace mathtest {
namespace cl {

extern llvm::cl::opt<bool> IsVerbose;

namespace detail {

extern llvm::cl::opt<llvm::cl::TestConfigsArg> TestConfigsOpt;
} // namespace detail

const llvm::SmallVector<TestConfig, 4> &getTestConfigs();
} // namespace cl
} // namespace mathtest

#endif // MATHTEST_COMMANDLINEEXTRAS_HPP
137 changes: 137 additions & 0 deletions offload/unittests/Conformance/include/mathtest/DeviceContext.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
/// \file
/// This file contains the definition of the DeviceContext class, which serves
/// as the high-level interface to a particular device (GPU).
///
/// This class provides methods for allocating buffers, loading binaries, and
/// getting and launching kernels on the device.
///
//===----------------------------------------------------------------------===//

#ifndef MATHTEST_DEVICECONTEXT_HPP
#define MATHTEST_DEVICECONTEXT_HPP

#include "mathtest/DeviceResources.hpp"
#include "mathtest/ErrorHandling.hpp"
#include "mathtest/Support.hpp"

#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/Error.h"

#include <cassert>
#include <cstddef>
#include <cstdint>
#include <memory>
#include <tuple>
#include <type_traits>
#include <utility>

namespace mathtest {

const llvm::SetVector<llvm::StringRef> &getPlatforms();

namespace detail {

void allocManagedMemory(ol_device_handle_t DeviceHandle, std::size_t Size,
void **AllocationOut) noexcept;
} // namespace detail

class DeviceContext {
// For simplicity, the current design of this class doesn't have support for
// asynchronous operations and all types of memory allocation.
//
// Other use cases could benefit from operations like enqueued kernel launch
// and enqueued memcpy, as well as device and host memory allocation.

public:
explicit DeviceContext(std::size_t GlobalDeviceId = 0);

explicit DeviceContext(llvm::StringRef Platform, std::size_t DeviceId = 0);

template <typename T>
ManagedBuffer<T> createManagedBuffer(std::size_t Size) const noexcept {
void *UntypedAddress = nullptr;

detail::allocManagedMemory(DeviceHandle, Size * sizeof(T), &UntypedAddress);
T *TypedAddress = static_cast<T *>(UntypedAddress);

return ManagedBuffer<T>(TypedAddress, Size);
}

[[nodiscard]] llvm::Expected<std::shared_ptr<DeviceImage>>
loadBinary(llvm::StringRef Directory, llvm::StringRef BinaryName) const;

template <typename KernelSignature>
[[nodiscard]] llvm::Expected<DeviceKernel<KernelSignature>>
getKernel(const std::shared_ptr<DeviceImage> &Image,
llvm::StringRef KernelName) const {
assert(Image && "Image provided to getKernel is null");

if (Image->DeviceHandle != DeviceHandle)
return llvm::createStringError(
"Image provided to getKernel was created for a different device");

auto ExpectedHandle = getKernelHandle(Image->Handle, KernelName);

if (!ExpectedHandle)
return ExpectedHandle.takeError();

return DeviceKernel<KernelSignature>(Image, *ExpectedHandle);
}

template <typename KernelSignature, typename... ArgTypes>
void launchKernel(DeviceKernel<KernelSignature> Kernel, uint32_t NumGroups,
uint32_t GroupSize, ArgTypes &&...Args) const noexcept {
using ExpectedTypes =
typename FunctionTypeTraits<KernelSignature>::ArgTypesTuple;
using ProvidedTypes = std::tuple<std::decay_t<ArgTypes>...>;

static_assert(std::is_same_v<ExpectedTypes, ProvidedTypes>,
"Argument types provided to launchKernel do not match the "
"kernel's signature");

if (Kernel.Image->DeviceHandle != DeviceHandle)
FATAL_ERROR("Kernel provided to launchKernel was created for a different "
"device");

if constexpr (sizeof...(Args) == 0) {
launchKernelImpl(Kernel.Handle, NumGroups, GroupSize, nullptr, 0);
} else {
auto KernelArgs = makeKernelArgsPack(std::forward<ArgTypes>(Args)...);

static_assert(
(std::is_trivially_copyable_v<std::decay_t<ArgTypes>> && ...),
"Argument types provided to launchKernel must be trivially copyable");

launchKernelImpl(Kernel.Handle, NumGroups, GroupSize, &KernelArgs,
sizeof(KernelArgs));
}
}

[[nodiscard]] llvm::StringRef getName() const noexcept;

[[nodiscard]] llvm::StringRef getPlatform() const noexcept;

private:
[[nodiscard]] llvm::Expected<ol_symbol_handle_t>
getKernelHandle(ol_program_handle_t ProgramHandle,
llvm::StringRef KernelName) const noexcept;

void launchKernelImpl(ol_symbol_handle_t KernelHandle, uint32_t NumGroups,
uint32_t GroupSize, const void *KernelArgs,
std::size_t KernelArgsSize) const noexcept;

std::size_t GlobalDeviceId;
ol_device_handle_t DeviceHandle;
};
} // namespace mathtest

#endif // MATHTEST_DEVICECONTEXT_HPP
Loading
Loading