@@ -34,6 +34,10 @@ inline namespace _V1 {
3434class interop_handle ;
3535class handler ;
3636namespace detail {
37+ // Prevent argument from being removed by the optimized. Needed for different
38+ // host functions referencing kernel that we instantiate but don't intend to
39+ // call on host (e.g. to preserve symbols for the debugger).
40+ __SYCL_EXPORT bool do_not_dce (void (*)(void *));
3741class HostTask ;
3842
3943// / Type of the command group.
@@ -163,6 +167,55 @@ class HostKernelBase {
163167 virtual void InstantiateKernelOnHost () = 0;
164168};
165169
170+ template <class KernelType , class KernelArgType , int Dims>
171+ void InstantiateKernelOnHost (void *p) {
172+ auto &MKernel = *static_cast <KernelType *>(p);
173+ using IDBuilder = sycl::detail::Builder;
174+ if constexpr (std::is_same_v<KernelArgType, void >) {
175+ runKernelWithoutArg (MKernel);
176+ } else if constexpr (std::is_same_v<KernelArgType, sycl::id<Dims>>) {
177+ sycl::id ID = InitializedVal<Dims, id>::template get<0 >();
178+ runKernelWithArg<const KernelArgType &>(MKernel, ID);
179+ } else if constexpr (std::is_same_v<KernelArgType, item<Dims, true >> ||
180+ std::is_same_v<KernelArgType, item<Dims, false >>) {
181+ constexpr bool HasOffset = std::is_same_v<KernelArgType, item<Dims, true >>;
182+ if constexpr (!HasOffset) {
183+ KernelArgType Item = IDBuilder::createItem<Dims, HasOffset>(
184+ InitializedVal<Dims, range>::template get<1 >(),
185+ InitializedVal<Dims, id>::template get<0 >());
186+ runKernelWithArg<KernelArgType>(MKernel, Item);
187+ } else {
188+ KernelArgType Item = IDBuilder::createItem<Dims, HasOffset>(
189+ InitializedVal<Dims, range>::template get<1 >(),
190+ InitializedVal<Dims, id>::template get<0 >(),
191+ InitializedVal<Dims, id>::template get<0 >());
192+ runKernelWithArg<KernelArgType>(MKernel, Item);
193+ }
194+ } else if constexpr (std::is_same_v<KernelArgType, nd_item<Dims>>) {
195+ sycl::range<Dims> Range = InitializedVal<Dims, range>::template get<1 >();
196+ sycl::id<Dims> ID = InitializedVal<Dims, id>::template get<0 >();
197+ sycl::group<Dims> Group =
198+ IDBuilder::createGroup<Dims>(Range, Range, Range, ID);
199+ sycl::item<Dims, true > GlobalItem =
200+ IDBuilder::createItem<Dims, true >(Range, ID, ID);
201+ sycl::item<Dims, false > LocalItem =
202+ IDBuilder::createItem<Dims, false >(Range, ID);
203+ KernelArgType NDItem =
204+ IDBuilder::createNDItem<Dims>(GlobalItem, LocalItem, Group);
205+ runKernelWithArg<const KernelArgType>(MKernel, NDItem);
206+ } else if constexpr (std::is_same_v<KernelArgType, sycl::group<Dims>>) {
207+ sycl::range<Dims> Range = InitializedVal<Dims, range>::template get<1 >();
208+ sycl::id<Dims> ID = InitializedVal<Dims, id>::template get<0 >();
209+ KernelArgType Group = IDBuilder::createGroup<Dims>(Range, Range, Range, ID);
210+ runKernelWithArg<KernelArgType>(MKernel, Group);
211+ } else {
212+ // Assume that anything else can be default-constructed. If not, this
213+ // should fail to compile and the implementor should implement a generic
214+ // case for the new argument type.
215+ runKernelWithArg<KernelArgType>(MKernel, KernelArgType{});
216+ }
217+ }
218+
166219// Class which stores specific lambda object.
167220template <class KernelType , class KernelArgType , int Dims>
168221class HostKernel : public HostKernelBase {
@@ -181,55 +234,32 @@ class HostKernel : public HostKernelBase {
181234 // kernel code instructions with source code lines.
182235 // NOTE: InstatiateKernelOnHost() should not be called.
183236 void InstantiateKernelOnHost () override {
184- using IDBuilder = sycl::detail::Builder;
185- if constexpr (std::is_same_v<KernelArgType, void >) {
186- runKernelWithoutArg (MKernel);
187- } else if constexpr (std::is_same_v<KernelArgType, sycl::id<Dims>>) {
188- sycl::id ID = InitializedVal<Dims, id>::template get<0 >();
189- runKernelWithArg<const KernelArgType &>(MKernel, ID);
190- } else if constexpr (std::is_same_v<KernelArgType, item<Dims, true >> ||
191- std::is_same_v<KernelArgType, item<Dims, false >>) {
192- constexpr bool HasOffset =
193- std::is_same_v<KernelArgType, item<Dims, true >>;
194- if constexpr (!HasOffset) {
195- KernelArgType Item = IDBuilder::createItem<Dims, HasOffset>(
196- InitializedVal<Dims, range>::template get<1 >(),
197- InitializedVal<Dims, id>::template get<0 >());
198- runKernelWithArg<KernelArgType>(MKernel, Item);
199- } else {
200- KernelArgType Item = IDBuilder::createItem<Dims, HasOffset>(
201- InitializedVal<Dims, range>::template get<1 >(),
202- InitializedVal<Dims, id>::template get<0 >(),
203- InitializedVal<Dims, id>::template get<0 >());
204- runKernelWithArg<KernelArgType>(MKernel, Item);
205- }
206- } else if constexpr (std::is_same_v<KernelArgType, nd_item<Dims>>) {
207- sycl::range<Dims> Range = InitializedVal<Dims, range>::template get<1 >();
208- sycl::id<Dims> ID = InitializedVal<Dims, id>::template get<0 >();
209- sycl::group<Dims> Group =
210- IDBuilder::createGroup<Dims>(Range, Range, Range, ID);
211- sycl::item<Dims, true > GlobalItem =
212- IDBuilder::createItem<Dims, true >(Range, ID, ID);
213- sycl::item<Dims, false > LocalItem =
214- IDBuilder::createItem<Dims, false >(Range, ID);
215- KernelArgType NDItem =
216- IDBuilder::createNDItem<Dims>(GlobalItem, LocalItem, Group);
217- runKernelWithArg<const KernelArgType>(MKernel, NDItem);
218- } else if constexpr (std::is_same_v<KernelArgType, sycl::group<Dims>>) {
219- sycl::range<Dims> Range = InitializedVal<Dims, range>::template get<1 >();
220- sycl::id<Dims> ID = InitializedVal<Dims, id>::template get<0 >();
221- KernelArgType Group =
222- IDBuilder::createGroup<Dims>(Range, Range, Range, ID);
223- runKernelWithArg<KernelArgType>(MKernel, Group);
224- } else {
225- // Assume that anything else can be default-constructed. If not, this
226- // should fail to compile and the implementor should implement a generic
227- // case for the new argument type.
228- runKernelWithArg<KernelArgType>(MKernel, KernelArgType{});
229- }
237+ detail::InstantiateKernelOnHost<KernelType, KernelArgType, Dims>(&MKernel);
230238 }
231239};
232240
241+ class SimpleHostKernel : public HostKernelBase {
242+ std::unique_ptr<char []> KernelBytes;
243+
244+ public:
245+ template <typename KernelType, typename KernelArgType, int Dims>
246+ SimpleHostKernel (const KernelType &KernelFunc, KernelArgType *, std::integral_constant<int , Dims>)
247+ : KernelBytes(new (
248+ std::align_val_t (alignof (KernelType))) char[sizeof(KernelType)]) {
249+ std::memcpy (KernelBytes.get (), &KernelFunc, sizeof (KernelType));
250+ // Hopefully, minimal run-time overhead:
251+ static thread_local auto ignore = do_not_dce (
252+ &detail::InstantiateKernelOnHost<KernelType, KernelArgType, Dims>);
253+ (void )ignore;
254+ }
255+ char *getPtr () override { return KernelBytes.get (); }
256+ ~SimpleHostKernel () override = default ;
257+ void InstantiateKernelOnHost () override {
258+ // We do this in the ctor instead.
259+ }
260+ };
261+
262+
233263} // namespace detail
234264} // namespace _V1
235265} // namespace sycl
0 commit comments