forked from alpaka-group/alpaka3
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathkernelMD.cpp
More file actions
115 lines (93 loc) · 3.84 KB
/
kernelMD.cpp
File metadata and controls
115 lines (93 loc) · 3.84 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
/* Copyright 2024 René Widera
* SPDX-License-Identifier: MPL-2.0
*/
/** @file
*
* This test validate if a M-dimension kernel can be called with a FrameSpec and ThreadSpec.
* Additionally the M-dimensional memcpy and memset is tested too.
*/
#include <alpaka/alpaka.hpp>
#include <catch2/catch_template_test_macros.hpp>
#include <catch2/catch_test_macros.hpp>
#include <iostream>
using namespace alpaka;
using TestApis = std::decay_t<decltype(onHost::allBackends(onHost::enabledApis, exec::enabledExecutors))>;
struct LastSetDataBlockIdx
{
ALPAKA_FN_ACC void operator()(
auto const& acc,
alpaka::concepts::IMdSpan auto out,
alpaka::concepts::Vector auto extentMd) const
{
for(auto i : onAcc::makeIdxMap(acc, onAcc::worker::threadsInGrid, IdxRange(extentMd)))
{
out[i] = i;
}
}
};
template<alpaka::concepts::Vector T_Extents, alpaka::concepts::Vector T_FrameSize>
struct Case
{
Case(T_Extents extents, T_FrameSize frameSize) : m_extents(extents), m_frameSize(frameSize)
{
}
T_Extents m_extents;
T_FrameSize m_frameSize;
};
void validate(auto& queue, auto& device, auto exec, auto testCase)
{
Vec extentMd = testCase.m_extents;
std::cout << " exec=" << onHost::demangledName(exec) << " extents=" << testCase.m_extents
<< " frame size=" << testCase.m_frameSize << std::endl;
auto dBuff = onHost::alloc<Vec<uint32_t, extentMd.dim()>>(device, extentMd);
auto hBuff = onHost::allocHostLike(dBuff);
// fill with non-zero values
onHost::fill(queue, dBuff, extentMd);
onHost::wait(queue);
auto frameSize = testCase.m_frameSize;
queue.enqueue(
exec,
onHost::FrameSpec{divExZero(extentMd, frameSize), frameSize},
KernelBundle{LastSetDataBlockIdx{}, dBuff, extentMd});
onHost::memcpy(queue, hBuff, dBuff);
onHost::wait(queue);
// validate that each data entry has its corresponding MD-element index
meta::ndLoopIncIdx(extentMd, [&](auto idx) { CHECK(hBuff[idx] == idx); });
onHost::memset(queue, dBuff, 0u);
onHost::memcpy(queue, hBuff, dBuff);
onHost::wait(queue);
// validate that all data is zero
meta::ndLoopIncIdx(extentMd, [&](auto idx) { CHECK(hBuff[idx] == ALPAKA_TYPEOF(extentMd)::fill(0)); });
}
TEMPLATE_LIST_TEST_CASE("kernelCallMD", "", TestApis)
{
auto cfg = TestType::makeDict();
auto deviceSpec = cfg[object::deviceSpec];
auto exec = cfg[object::exec];
auto devSelector = onHost::makeDeviceSelector(deviceSpec);
if(!devSelector.isAvailable())
{
std::cout << "No device available for " << deviceSpec.getName() << std::endl;
return;
}
std::cout << deviceSpec.getApi().getName() << std::endl;
onHost::Device device = devSelector.makeDevice(0);
std::cout << device.getName() << std::endl;
onHost::Queue queue = device.makeQueue();
auto testCfg = std::make_tuple(
Case{Vec{3u}, Vec{2u}},
Case{Vec{4u, 7u}, Vec{2u, 4u}},
Case{Vec{4u, 8u, 13}, Vec{2u, 4u, 8u}},
Case{Vec{4u, 8u, 16, 31}, Vec{2u, 4u, 8u, 4u}},
Case{Vec{4u, 8u, 16, 8u, 3u}, Vec{2u, 4u, 8u, 4u, 2u}},
Case{Vec{4u, 8u, 16, 8u, 3u, 5u}, Vec{2u, 4u, 8u, 4u, 2u, 3u}},
Case{Vec{4u, 8u, 16, 8u, 3u, 5u, 2u}, Vec{2u, 4u, 8u, 4u, 2u, 3u, 2u}},
Case{Vec{3u}, CVec<uint32_t, 2u>{}},
Case{Vec{4u, 7u}, CVec<uint32_t, 2u, 4u>{}},
Case{Vec{4u, 8u, 13}, CVec<uint32_t, 2u, 4u, 8u>{}},
Case{Vec{4u, 8u, 16, 31}, CVec<uint32_t, 2u, 4u, 8u, 4u>{}},
Case{Vec{4u, 8u, 16, 8u, 3u}, CVec<uint32_t, 2u, 4u, 8u, 4u, 2u>{}},
Case{Vec{4u, 8u, 16, 8u, 3u, 5u}, CVec<uint32_t, 2u, 4u, 8u, 4u, 2u, 3u>{}},
Case{Vec{4u, 8u, 16, 8u, 3u, 5u, 2u}, CVec<uint32_t, 2u, 4u, 8u, 4u, 2u, 3u, 2u>{}});
std::apply([&](auto... testCase) { (validate(queue, device, exec, testCase), ...); }, testCfg);
}