forked from alpaka-group/alpaka3
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathblock.cpp
More file actions
82 lines (68 loc) · 2.52 KB
/
block.cpp
File metadata and controls
82 lines (68 loc) · 2.52 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
/* Copyright 2024 René Widera
* SPDX-License-Identifier: MPL-2.0
*/
#include <alpaka/alpaka.hpp>
#include <catch2/catch_template_test_macros.hpp>
#include <catch2/catch_test_macros.hpp>
#include <chrono>
#include <functional>
#include <iostream>
#include <thread>
using namespace alpaka;
using namespace alpaka::onHost;
using TestApis = std::decay_t<decltype(allBackends(enabledApis, exec::enabledExecutors))>;
struct BlockIotaKernel
{
ALPAKA_FN_ACC void operator()(auto const& acc, auto out, auto numBlocks) const
{
auto const numDataElemInBlock = acc[frame::extent];
for(auto blockIdx : onAcc::makeIdxMap(
acc,
onAcc::worker::blocksInGrid,
IdxRange{ALPAKA_TYPEOF(numBlocks)::fill(0), numBlocks * numDataElemInBlock, numDataElemInBlock},
onAcc::traverse::tiled,
onAcc::layout::contiguous))
{
auto blockOffset = blockIdx;
for(auto inBlockOffset : onAcc::makeIdxMap(
acc,
onAcc::worker::threadsInBlock,
onAcc::range::frameExtent,
onAcc::traverse::tiled))
{
out[blockOffset + inBlockOffset] = (blockOffset + inBlockOffset).x();
}
}
}
};
TEMPLATE_LIST_TEST_CASE("block iota", "", TestApis)
{
auto cfg = TestType::makeDict();
auto deviceSpec = cfg[object::deviceSpec];
auto exec = cfg[object::exec];
auto devSelector = onHost::makeDeviceSelector(deviceSpec);
if(!devSelector.isAvailable())
{
std::cout << "No device available for " << deviceSpec.getName() << std::endl;
return;
}
std::cout << deviceSpec.getApi().getName() << std::endl;
onHost::Device device = devSelector.makeDevice(0);
std::cout << device.getDeviceProperties() << std::endl;
Queue queue = device.makeQueue();
constexpr Vec numBlocks = Vec{9u};
constexpr Vec blockExtent = Vec{4u};
constexpr Vec dataExtent = numBlocks * blockExtent;
std::cout << "block iota exec=" << onHost::demangledName(exec) << std::endl;
auto dBuff = onHost::alloc<uint32_t>(device, dataExtent);
auto hBuff = onHost::allocHostLike(dBuff);
onHost::wait(queue);
queue.enqueue(exec, FrameSpec{numBlocks / 2u, blockExtent}, KernelBundle{BlockIotaKernel{}, dBuff, numBlocks});
onHost::memcpy(queue, hBuff, dBuff);
onHost::wait(queue);
auto* ptr = onHost::data(hBuff);
for(uint32_t i = 0u; i < dataExtent; ++i)
{
CHECK(i == ptr[i]);
}
}