Skip to content

Commit 6b0d8a6

Browse files
add alpaka function interface (#452)
* vendor function interface rovide an interface to map function calls to vendor functions. This can be used to call functionality that is not available in alpaka or in cases where the vendor implementation provides better performance. The vendor interface provides fallback possibilities to alpaka implemenations to ensure the portability of the user application. - implement vendor interface - provide an example on how to use the interface * review comments - extent the example description/motivation - remove unsued code (random number generator) * refactor initial alpaka vendor interface - rename `alpaak::vendor` to `alpaka::fn` - update documantation - add generic fallback policy `toGeneric` * apply review comments - simplify `alpaka::Fn::operator()` concept usage - simplify vendor example CUDA dispatch
1 parent 1c5b0db commit 6b0d8a6

File tree

10 files changed

+726
-2
lines changed

10 files changed

+726
-2
lines changed

example/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,3 +24,4 @@ add_subdirectory("vectorAdd/")
2424
add_subdirectory("tutorial/")
2525
add_subdirectory("alpaka-ls/")
2626
add_subdirectory("randomInit/")
27+
add_subdirectory("vendorApi/")

example/vendorApi/CMakeLists.txt

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
#
2+
# Copyright 2026 René Widera
3+
# SPDX-License-Identifier: ISC
4+
#
5+
6+
################################################################################
7+
# Required CMake version.
8+
9+
cmake_minimum_required(VERSION 3.25)
10+
11+
set_property(GLOBAL PROPERTY USE_FOLDERS ON)
12+
13+
################################################################################
14+
# Project.
15+
16+
set(_TARGET_NAME vendorApi)
17+
18+
project(${_TARGET_NAME} LANGUAGES CXX)
19+
20+
#-------------------------------------------------------------------------------
21+
# Find alpaka.
22+
23+
if(NOT TARGET alpaka::alpaka)
24+
option(alpaka_USE_SOURCE_TREE "Use alpaka's source tree instead of an alpaka installation" OFF)
25+
26+
if(alpaka_USE_SOURCE_TREE)
27+
# Don't build the examples recursively
28+
set(alpaka_BUILD_EXAMPLES OFF)
29+
add_subdirectory("${CMAKE_CURRENT_LIST_DIR}/../.." "${CMAKE_BINARY_DIR}/alpaka")
30+
else()
31+
find_package(alpaka REQUIRED)
32+
endif()
33+
endif()
34+
35+
if(alpaka_DEP_CUDA)
36+
find_package(CUDAToolkit)
37+
if(NOT CUDAToolkit_FOUND)
38+
message(STATUS "Optional CUDAToolkit not found, alpaka vendor api will not use thrust")
39+
else()
40+
message(STATUS "Found optional CUDAToolkit, version " ${CUDAToolkit_VERSION})
41+
42+
set(_alpaka_HAS_CUDA_TOOLKIT 1)
43+
endif()
44+
endif()
45+
46+
#-------------------------------------------------------------------------------
47+
# Add executable.
48+
49+
add_executable(${_TARGET_NAME} src/main.cpp)
50+
target_include_directories(${_TARGET_NAME} PRIVATE "src")
51+
target_link_libraries(${_TARGET_NAME} PUBLIC alpaka::alpaka)
52+
alpaka_finalize(${_TARGET_NAME})
53+
54+
set_target_properties(${_TARGET_NAME} PROPERTIES FOLDER example)
55+
56+
if(_alpaka_HAS_CUDA_TOOLKIT)
57+
target_link_libraries(${_TARGET_NAME} PRIVATE CUDA::cudart_static)
58+
endif()
59+
60+
add_test(NAME ${_TARGET_NAME} COMMAND ${_TARGET_NAME})

example/vendorApi/src/alpakaFn.hpp

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
/* Copyright 2026 René Widera
2+
* SPDX-License-Identifier: ISC
3+
*/
4+
5+
#pragma once
6+
7+
#include "fn.hpp"
8+
9+
#include <alpaka/alpaka.hpp>
10+
11+
namespace vendorExample
12+
{
13+
/** Generic fallback to alpaka implementation.
14+
*
15+
* If Transform is defined with alpaka::fn::Fallback::toAlpaka the generic alpaka implementation is called in
16+
* case no other overload is fitting.
17+
*
18+
* @{
19+
*/
20+
template<alpaka::concepts::DeviceKind T_DeviceKind>
21+
constexpr void fnRegister(Transform::Spec<alpaka::fn::api::Alpaka, T_DeviceKind>)
22+
{
23+
}
24+
25+
template<alpaka::concepts::DeviceKind T_DeviceKind>
26+
constexpr void fnDispatch(
27+
Transform::Spec<alpaka::fn::api::Alpaka, T_DeviceKind>,
28+
auto&& queue,
29+
alpaka::concepts::IMdSpan auto&& output,
30+
auto&& binaryOp,
31+
alpaka::concepts::IMdSpan auto&& input0,
32+
alpaka::concepts::IMdSpan auto&& input1)
33+
{
34+
std::cout << "call alpaka::transform" << std::endl;
35+
alpaka::onHost::transform(
36+
ALPAKA_FORWARD(queue),
37+
ALPAKA_FORWARD(output),
38+
ALPAKA_FORWARD(binaryOp),
39+
ALPAKA_FORWARD(input0),
40+
ALPAKA_FORWARD(input1));
41+
}
42+
43+
/** @} */
44+
} // namespace vendorExample

example/vendorApi/src/cudaFn.hpp

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
/* Copyright 2026 René Widera
2+
* SPDX-License-Identifier: ISC
3+
*/
4+
5+
#pragma once
6+
7+
#include "fn.hpp"
8+
9+
#include <alpaka/alpaka.hpp>
10+
11+
#if __has_include(<thrust/transform.h>)
12+
# include <thrust/device_vector.h>
13+
# include <thrust/transform.h>
14+
15+
namespace vendorExample
16+
{
17+
/** Cuda function overload for Transform.
18+
*
19+
* @{
20+
*/
21+
constexpr void fnRegister(Transform::Spec<alpaka::api::Cuda, alpaka::deviceKind::NvidiaGpu>)
22+
{
23+
}
24+
25+
constexpr void fnDispatch(
26+
Transform::Spec<alpaka::api::Cuda, alpaka::deviceKind::NvidiaGpu>,
27+
auto&& queue,
28+
alpaka::concepts::IMdSpan auto&& output,
29+
auto&& binaryOp,
30+
alpaka::concepts::IMdSpan auto&& input0,
31+
alpaka::concepts::IMdSpan auto&& input1)
32+
{
33+
std::cout << "call thrust::transform" << std::endl;
34+
thrust::transform(
35+
thrust::cuda::par.on(queue.getNativeHandle()),
36+
input0.data(),
37+
input0.data() + input0.getExtents().x(),
38+
input1.data(),
39+
output.data(),
40+
binaryOp);
41+
}
42+
43+
/** @} */
44+
} // namespace vendorExample
45+
#endif

example/vendorApi/src/fn.hpp

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
/* Copyright 2026 René Widera
2+
* SPDX-License-Identifier: ISC
3+
*/
4+
5+
#pragma once
6+
7+
#include <alpaka/alpaka.hpp>
8+
9+
#include <algorithm>
10+
11+
namespace vendorExample
12+
{
13+
/** Function class definition.
14+
*
15+
* The class is used to register, dispatch and call vendor function overloads.
16+
* alpaka::fn::Fallback and alpaka::fn::Registration are optional arguments.
17+
*/
18+
ALPAKA_FN_SYMBOL(Transform, alpaka::fn::Fallback::toAlpaka, alpaka::fn::Registration::enforced);
19+
20+
/** Notify alpaka that the function symbol Transform for the device specification, api Host and the device kind
21+
* Cpu, is available.
22+
*
23+
* This allows to call isRegistered() with a device or queue specification of api Host and device kind Cpu and get
24+
* true as the result. It also allows to skip a code path if there is no specialization available.
25+
*
26+
* @code
27+
* // only call the vendor function if it is registered for the given queue device specification
28+
* if constexpr (vendorExample::Transform::isRegistered(queue))
29+
* {
30+
* // call vendor function overload depending on the queue's device specification
31+
* vendorExample::Transform::call(queue, output, binaryOp, input0, input1);
32+
* }
33+
* @endcode
34+
*/
35+
constexpr void fnRegister(Transform::Spec<alpaka::api::Host, alpaka::deviceKind::Cpu>)
36+
{
37+
}
38+
39+
/** Overload Transform for the api HOST and the device kind CPU.
40+
*
41+
* This function will be called if Transform::call() is called with a queue or device specification of api Host and
42+
* device kind Cpu.
43+
*/
44+
constexpr void fnDispatch(
45+
Transform::Spec<alpaka::api::Host, alpaka::deviceKind::Cpu>,
46+
auto&& queue,
47+
alpaka::concepts::IMdSpan auto&& output,
48+
auto&& binaryOp,
49+
alpaka::concepts::IMdSpan auto&& input0,
50+
alpaka::concepts::IMdSpan auto&& input1)
51+
{
52+
std::cout << "call std::transform" << std::endl;
53+
// ensure the pointer is non const, capturing the span results into const mdspan within the const lambda
54+
auto outPtr = output.data();
55+
queue.enqueueHostFn(
56+
[=]()
57+
{
58+
std::transform(
59+
input0.data(),
60+
input0.data() + input0.getExtents().x(),
61+
input1.data(),
62+
outPtr,
63+
binaryOp);
64+
});
65+
}
66+
} // namespace vendorExample

0 commit comments

Comments
 (0)