Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
122 changes: 76 additions & 46 deletions sycl/include/sycl/detail/cg_types.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,10 @@ inline namespace _V1 {
class interop_handle;
class handler;
namespace detail {
// Prevent argument from being removed by the optimized. Needed for different
// host functions referencing kernel that we instantiate but don't intend to
// call on host (e.g. to preserve symbols for the debugger).
__SYCL_EXPORT bool do_not_dce(void (*)(void *));
class HostTask;

/// Type of the command group.
Expand Down Expand Up @@ -163,6 +167,55 @@ class HostKernelBase {
virtual void InstantiateKernelOnHost() = 0;
};

template <class KernelType, class KernelArgType, int Dims>
void InstantiateKernelOnHost(void *p) {
auto &MKernel = *static_cast<KernelType *>(p);
using IDBuilder = sycl::detail::Builder;
if constexpr (std::is_same_v<KernelArgType, void>) {
runKernelWithoutArg(MKernel);
} else if constexpr (std::is_same_v<KernelArgType, sycl::id<Dims>>) {
sycl::id ID = InitializedVal<Dims, id>::template get<0>();
runKernelWithArg<const KernelArgType &>(MKernel, ID);
} else if constexpr (std::is_same_v<KernelArgType, item<Dims, true>> ||
std::is_same_v<KernelArgType, item<Dims, false>>) {
constexpr bool HasOffset = std::is_same_v<KernelArgType, item<Dims, true>>;
if constexpr (!HasOffset) {
KernelArgType Item = IDBuilder::createItem<Dims, HasOffset>(
InitializedVal<Dims, range>::template get<1>(),
InitializedVal<Dims, id>::template get<0>());
runKernelWithArg<KernelArgType>(MKernel, Item);
} else {
KernelArgType Item = IDBuilder::createItem<Dims, HasOffset>(
InitializedVal<Dims, range>::template get<1>(),
InitializedVal<Dims, id>::template get<0>(),
InitializedVal<Dims, id>::template get<0>());
runKernelWithArg<KernelArgType>(MKernel, Item);
}
} else if constexpr (std::is_same_v<KernelArgType, nd_item<Dims>>) {
sycl::range<Dims> Range = InitializedVal<Dims, range>::template get<1>();
sycl::id<Dims> ID = InitializedVal<Dims, id>::template get<0>();
sycl::group<Dims> Group =
IDBuilder::createGroup<Dims>(Range, Range, Range, ID);
sycl::item<Dims, true> GlobalItem =
IDBuilder::createItem<Dims, true>(Range, ID, ID);
sycl::item<Dims, false> LocalItem =
IDBuilder::createItem<Dims, false>(Range, ID);
KernelArgType NDItem =
IDBuilder::createNDItem<Dims>(GlobalItem, LocalItem, Group);
runKernelWithArg<const KernelArgType>(MKernel, NDItem);
} else if constexpr (std::is_same_v<KernelArgType, sycl::group<Dims>>) {
sycl::range<Dims> Range = InitializedVal<Dims, range>::template get<1>();
sycl::id<Dims> ID = InitializedVal<Dims, id>::template get<0>();
KernelArgType Group = IDBuilder::createGroup<Dims>(Range, Range, Range, ID);
runKernelWithArg<KernelArgType>(MKernel, Group);
} else {
// Assume that anything else can be default-constructed. If not, this
// should fail to compile and the implementor should implement a generic
// case for the new argument type.
runKernelWithArg<KernelArgType>(MKernel, KernelArgType{});
}
}

// Class which stores specific lambda object.
template <class KernelType, class KernelArgType, int Dims>
class HostKernel : public HostKernelBase {
Expand All @@ -181,55 +234,32 @@ class HostKernel : public HostKernelBase {
// kernel code instructions with source code lines.
// NOTE: InstatiateKernelOnHost() should not be called.
void InstantiateKernelOnHost() override {
using IDBuilder = sycl::detail::Builder;
if constexpr (std::is_same_v<KernelArgType, void>) {
runKernelWithoutArg(MKernel);
} else if constexpr (std::is_same_v<KernelArgType, sycl::id<Dims>>) {
sycl::id ID = InitializedVal<Dims, id>::template get<0>();
runKernelWithArg<const KernelArgType &>(MKernel, ID);
} else if constexpr (std::is_same_v<KernelArgType, item<Dims, true>> ||
std::is_same_v<KernelArgType, item<Dims, false>>) {
constexpr bool HasOffset =
std::is_same_v<KernelArgType, item<Dims, true>>;
if constexpr (!HasOffset) {
KernelArgType Item = IDBuilder::createItem<Dims, HasOffset>(
InitializedVal<Dims, range>::template get<1>(),
InitializedVal<Dims, id>::template get<0>());
runKernelWithArg<KernelArgType>(MKernel, Item);
} else {
KernelArgType Item = IDBuilder::createItem<Dims, HasOffset>(
InitializedVal<Dims, range>::template get<1>(),
InitializedVal<Dims, id>::template get<0>(),
InitializedVal<Dims, id>::template get<0>());
runKernelWithArg<KernelArgType>(MKernel, Item);
}
} else if constexpr (std::is_same_v<KernelArgType, nd_item<Dims>>) {
sycl::range<Dims> Range = InitializedVal<Dims, range>::template get<1>();
sycl::id<Dims> ID = InitializedVal<Dims, id>::template get<0>();
sycl::group<Dims> Group =
IDBuilder::createGroup<Dims>(Range, Range, Range, ID);
sycl::item<Dims, true> GlobalItem =
IDBuilder::createItem<Dims, true>(Range, ID, ID);
sycl::item<Dims, false> LocalItem =
IDBuilder::createItem<Dims, false>(Range, ID);
KernelArgType NDItem =
IDBuilder::createNDItem<Dims>(GlobalItem, LocalItem, Group);
runKernelWithArg<const KernelArgType>(MKernel, NDItem);
} else if constexpr (std::is_same_v<KernelArgType, sycl::group<Dims>>) {
sycl::range<Dims> Range = InitializedVal<Dims, range>::template get<1>();
sycl::id<Dims> ID = InitializedVal<Dims, id>::template get<0>();
KernelArgType Group =
IDBuilder::createGroup<Dims>(Range, Range, Range, ID);
runKernelWithArg<KernelArgType>(MKernel, Group);
} else {
// Assume that anything else can be default-constructed. If not, this
// should fail to compile and the implementor should implement a generic
// case for the new argument type.
runKernelWithArg<KernelArgType>(MKernel, KernelArgType{});
}
detail::InstantiateKernelOnHost<KernelType, KernelArgType, Dims>(&MKernel);
}
};

class SimpleHostKernel : public HostKernelBase {
std::unique_ptr<char[]> KernelBytes;

public:
template <typename KernelType, typename KernelArgType, int Dims>
SimpleHostKernel(const KernelType &KernelFunc, KernelArgType *, std::integral_constant<int, Dims>)
: KernelBytes(new(
std::align_val_t(alignof(KernelType))) char[sizeof(KernelType)]) {
std::memcpy(KernelBytes.get(), &KernelFunc, sizeof(KernelType));
// Hopefully, minimal run-time overhead:
static thread_local auto ignore = do_not_dce(
&detail::InstantiateKernelOnHost<KernelType, KernelArgType, Dims>);
(void)ignore;
}
char *getPtr() override { return KernelBytes.get(); }
~SimpleHostKernel() override = default;
void InstantiateKernelOnHost() override {
// We do this in the ctor instead.
}
};


} // namespace detail
} // namespace _V1
} // namespace sycl
20 changes: 18 additions & 2 deletions sycl/include/sycl/handler.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -739,8 +739,24 @@ class __SYCL_EXPORT handler {
// Not using `std::make_unique` to avoid unnecessary instantiations of
// `std::unique_ptr<HostKernel<...>>`. Only
// `std::unique_ptr<HostKernelBase>` is necessary.
MHostKernel.reset(new detail::HostKernel<KernelType, LambdaArgType, Dims>(
std::forward<KernelTypeUniversalRef>(KernelFunc)));

#if __has_builtin(__is_trivially_copyable) && \
__has_builtin(__is_trivially_destructible)
// libstdc++ implementation of std::is_trivially_[copyable|destructible]_v
// has more compile time impact than the benefits of using single
// `SimpleHostKernel` vs. intstantiating `HostKernel` for each
// `KernelFunc`. As such, only do this optimization of the compilation time
// when using `clang` as host compiler.
if constexpr (__is_trivially_copyable(KernelType) &&
__is_trivially_destructible(KernelType))
MHostKernel.reset(new detail::SimpleHostKernel(
std::forward<KernelTypeUniversalRef>(KernelFunc),
static_cast<LambdaArgType *>(nullptr),
std::integral_constant<int, Dims>{}));
else
#endif
MHostKernel.reset(new detail::HostKernel<KernelType, LambdaArgType, Dims>(
std::forward<KernelTypeUniversalRef>(KernelFunc)));

constexpr bool KernelHasName =
detail::getKernelName<KernelName>() != nullptr &&
Expand Down
1 change: 1 addition & 0 deletions sycl/source/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -233,6 +233,7 @@ set(SYCL_COMMON_SOURCES
"builtins/math_functions.cpp"
"builtins/native_math_functions.cpp"
"builtins/relational_functions.cpp"
"cg_types.cpp"
"detail/accessor_impl.cpp"
"detail/allowlist.cpp"
"detail/bindless_images.cpp"
Expand Down
18 changes: 18 additions & 0 deletions sycl/source/cg_types.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
//==---- cg_types.cpp - Auxiliary types required by command group class ----==//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include <sycl/detail/cg_types.hpp>

namespace sycl {
inline namespace _V1 {

namespace detail {
__SYCL_EXPORT bool do_not_dce(void (*)(void *)) { return true; }
} // namespace detail
} // namespace _V1
} // namespace sycl
Loading