forked from alpaka-group/alpaka3
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathexecuteOnComputeDevice.hpp
More file actions
75 lines (64 loc) · 3.52 KB
/
executeOnComputeDevice.hpp
File metadata and controls
75 lines (64 loc) · 3.52 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
/* Copyright 2024 Benjamin Worpitz, Andrea Bocci, Bernhard Manfred Gruber, Jan Stephan, Aurora Perego
* SPDX-License-Identifier: MPL-2.0
*/
#pragma once
#include "alpaka/core/Sycl.hpp"
#include <alpaka/alpaka.hpp>
#include <catch2/catch_message.hpp>
#include <catch2/catch_test_macros.hpp>
#include <utility>
#define ALPAKA_CHECK(success, expression) \
do \
{ \
if(!(expression)) \
{ \
printf("ALPAKA_CHECK failed because '!(%s)'\n", #expression); \
success = false; \
} \
} while(0)
namespace alpaka::test
{
template<typename T_DataType = alpaka::NotRequired>
bool executeOnComputeDevice(auto cfg, auto kernelFnObj, auto&&... args)
{
auto deviceSpec = cfg[object::deviceSpec];
auto exec = cfg[object::exec];
auto devSelector = onHost::makeDeviceSelector(deviceSpec);
if(!devSelector.isAvailable())
{
std::cout << "No device available for " << deviceSpec.getName() << std::endl;
return false;
}
INFO("testing" << " functor:" << alpaka::onHost::demangledName(kernelFnObj));
INFO("api:" << deviceSpec.getApi().getName());
onHost::Device device = devSelector.makeDevice(0);
#if ALPAKA_LANG_ONEAPI
// support for double precision is not guaranteed for sycl devices such as Intel GPUs
if constexpr(std::is_same_v<T_DataType, double> && std::is_same_v<decltype(deviceSpec.getApi()), api::OneApi>)
{
if(device.getNativeHandle().first.template get_info<sycl::info::device::double_fp_config>().size() == 0)
{
WARN(
onHost::getName(device)
<< " does not support double precision.\n Skip benchmark.\n"
"For Intel Arc GPUs, use the environment variables `IGC_EnableDPEmulation=1 "
"OverrideDefaultFP64Settings=1` to emulate double precision support.\n");
return true;
}
}
#endif
INFO("exec:" << onHost::demangledName(exec));
INFO("device:" << device.getName());
onHost::Queue queue = device.makeQueue();
auto hBufferResults = onHost::allocHost<bool>(1u);
auto dBufferResults = onHost::allocLike(device, hBufferResults);
onHost::memset(queue, dBufferResults, static_cast<std::uint8_t>(true));
// Let alpaka calculate good block and grid sizes given our full problem extent
onHost::concepts::FrameSpec auto frameSpec = onHost::FrameSpec{1u, 1u};
auto kernel = KernelBundle{kernelFnObj, dBufferResults, ALPAKA_FORWARD(args)...};
queue.enqueue(exec, frameSpec, kernel);
onHost::memcpy(queue, hBufferResults, dBufferResults);
alpaka::onHost::wait(queue);
return hBufferResults[0];
}
} // namespace alpaka::test