Skip to content

Commit ba29f69

Browse files
committed
Add test
1 parent 883393c commit ba29f69

File tree

3 files changed

+122
-1
lines changed

3 files changed

+122
-1
lines changed

offload/unittests/OffloadAPI/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ add_offload_unittest("offload.unittests"
2020
${CMAKE_CURRENT_SOURCE_DIR}/program/olDestroyProgram.cpp
2121
${CMAKE_CURRENT_SOURCE_DIR}/kernel/olGetKernel.cpp
2222
${CMAKE_CURRENT_SOURCE_DIR}/kernel/olLaunchKernel.cpp
23+
${CMAKE_CURRENT_SOURCE_DIR}/kernel/olLaunchKernelSuggestedGroupSize.cpp
2324
${CMAKE_CURRENT_SOURCE_DIR}/event/olDestroyEvent.cpp
2425
${CMAKE_CURRENT_SOURCE_DIR}/event/olWaitEvent.cpp
2526
)

offload/unittests/OffloadAPI/device_code/foo.c

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,5 +2,11 @@
22
#include <stdint.h>
33

44
__gpu_kernel void foo(uint32_t *out) {
5-
out[__gpu_thread_id(0)] = __gpu_thread_id(0);
5+
int x = __gpu_block_id(0) * __gpu_num_threads(0) + __gpu_thread_id(0);
6+
int xw = __gpu_num_blocks(0) * __gpu_num_threads(0);
7+
int y = __gpu_block_id(1) * __gpu_num_threads(1) + __gpu_thread_id(1);
8+
int yw = __gpu_num_blocks(1) * __gpu_num_threads(1);
9+
int z = __gpu_block_id(2) * __gpu_num_threads(2) + __gpu_thread_id(2);
10+
int offset = (z * yw * xw) + (y * xw) + x;
11+
out[offset] = offset;
612
}
Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,114 @@
1+
//===------- Offload API tests - olLaunchKernelSuggestedGroupSize ---------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#include "../common/Fixtures.hpp"
10+
#include <OffloadAPI.h>
11+
#include <gtest/gtest.h>
12+
13+
static constexpr uint32_t COMBOS[6][4] = {
14+
{1, 64, 1, 1}, {1, 63, 1, 1}, {2, 64, 64, 1},
15+
{2, 40, 40, 1}, {3, 64, 64, 64}, {3, 128, 20, 12},
16+
};
17+
18+
struct olLaunchKernelSuggestedGroupSizeTest : OffloadQueueTest {
19+
void SetUp() override {
20+
RETURN_ON_FATAL_FAILURE(OffloadQueueTest::SetUp());
21+
ASSERT_TRUE(TestEnvironment::loadDeviceBinary("foo", Device, DeviceBin));
22+
ASSERT_GE(DeviceBin->getBufferSize(), 0lu);
23+
ASSERT_SUCCESS(olCreateProgram(Device, DeviceBin->getBufferStart(),
24+
DeviceBin->getBufferSize(), &Program));
25+
ASSERT_SUCCESS(olGetKernel(Program, "foo", &Kernel));
26+
}
27+
28+
void TearDown() override {
29+
if (Program) {
30+
olDestroyProgram(Program);
31+
}
32+
RETURN_ON_FATAL_FAILURE(OffloadQueueTest::TearDown());
33+
}
34+
35+
std::unique_ptr<llvm::MemoryBuffer> DeviceBin;
36+
ol_program_handle_t Program = nullptr;
37+
ol_kernel_handle_t Kernel = nullptr;
38+
};
39+
40+
OFFLOAD_TESTS_INSTANTIATE_DEVICE_FIXTURE(olLaunchKernelSuggestedGroupSizeTest);
41+
42+
TEST_P(olLaunchKernelSuggestedGroupSizeTest, Success) {
43+
for (auto C : COMBOS) {
44+
std::string scope{};
45+
llvm::raw_string_ostream os{scope};
46+
os << "{ " << C[0] << ", " << C[1] << ", " << C[2] << ", " << C[3] << "}";
47+
os.flush();
48+
SCOPED_TRACE(scope);
49+
50+
auto NumItems = C[1] * C[2] * C[3];
51+
52+
ol_kernel_launch_size_suggested_args_t LaunchArgs{};
53+
LaunchArgs.Dimensions = C[0];
54+
LaunchArgs.NumItemsX = C[1];
55+
LaunchArgs.NumItemsY = C[2];
56+
LaunchArgs.NumItemsZ = C[3];
57+
58+
void *Mem;
59+
ASSERT_SUCCESS(olMemAlloc(Device, OL_ALLOC_TYPE_MANAGED,
60+
NumItems * sizeof(int), &Mem));
61+
struct {
62+
void *Mem;
63+
} Args{Mem};
64+
65+
ASSERT_SUCCESS(olLaunchKernelSuggestedGroupSize(
66+
Queue, Device, Kernel, &Args, sizeof(Args), &LaunchArgs, nullptr));
67+
68+
ASSERT_SUCCESS(olWaitQueue(Queue));
69+
70+
int *Data = (int *)Mem;
71+
for (int i = 0; i < static_cast<int>(NumItems); i++) {
72+
ASSERT_EQ(Data[i], i);
73+
}
74+
75+
ASSERT_SUCCESS(olMemFree(Mem));
76+
}
77+
}
78+
79+
TEST_P(olLaunchKernelSuggestedGroupSizeTest, SuccessSynchronous) {
80+
for (auto C : COMBOS) {
81+
std::string scope{};
82+
llvm::raw_string_ostream os{scope};
83+
os << "{ " << C[0] << ", " << C[1] << ", " << C[2] << ", " << C[3] << "}";
84+
os.flush();
85+
SCOPED_TRACE(scope);
86+
87+
auto NumItems = C[1] * C[2] * C[3];
88+
89+
ol_kernel_launch_size_suggested_args_t LaunchArgs{};
90+
LaunchArgs.Dimensions = C[0];
91+
LaunchArgs.NumItemsX = C[1];
92+
LaunchArgs.NumItemsY = C[2];
93+
LaunchArgs.NumItemsZ = C[3];
94+
95+
void *Mem;
96+
ASSERT_SUCCESS(olMemAlloc(Device, OL_ALLOC_TYPE_MANAGED,
97+
NumItems * sizeof(int), &Mem));
98+
struct {
99+
void *Mem;
100+
} Args{Mem};
101+
102+
ASSERT_SUCCESS(olLaunchKernelSuggestedGroupSize(
103+
nullptr, Device, Kernel, &Args, sizeof(Args), &LaunchArgs, nullptr));
104+
105+
ASSERT_SUCCESS(olWaitQueue(Queue));
106+
107+
int *Data = (int *)Mem;
108+
for (int i = 0; i < static_cast<int>(NumItems); i++) {
109+
ASSERT_EQ(Data[i], i);
110+
}
111+
112+
ASSERT_SUCCESS(olMemFree(Mem));
113+
}
114+
}

0 commit comments

Comments
 (0)