Skip to content

Commit 40022ca

Browse files
authored
[SYCL][RTC] Ensure template kernel instantiation (#16138)
As a step towards feature parity between the `sycl` and `sycl_jit` RTC implementations, this PR cherry-picks @cperkinsintel's workaround from #16305 for triggering explicit template instantiations. --------- Signed-off-by: Julian Oppermann <[email protected]>
1 parent 906a9a3 commit 40022ca

File tree

2 files changed

+39
-11
lines changed

2 files changed

+39
-11
lines changed

sycl/source/detail/jit_compiler.cpp

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1211,17 +1211,21 @@ sycl_device_binaries jit_compiler::compileSYCL(
12111211
const std::vector<std::string> &UserArgs, std::string *LogPtr,
12121212
const std::vector<std::string> &RegisteredKernelNames) {
12131213

1214-
// TODO: Handle template instantiation.
1215-
if (!RegisteredKernelNames.empty()) {
1216-
throw sycl::exception(
1217-
sycl::errc::build,
1218-
"Property `sycl::ext::oneapi::experimental::registered_kernel_names` "
1219-
"is not yet supported for the `sycl_jit` source language");
1214+
// RegisteredKernelNames may contain template specializations, so we just put
1215+
// them in main() which ensures they are instantiated.
1216+
std::ostringstream ss;
1217+
ss << SYCLSource << '\n';
1218+
ss << "int main() {\n";
1219+
for (const std::string &KernelName : RegisteredKernelNames) {
1220+
ss << " (void)" << KernelName << ";\n";
12201221
}
1222+
ss << " return 0;\n}\n" << std::endl;
1223+
1224+
std::string FinalSource = ss.str();
12211225

12221226
std::string SYCLFileName = Id + ".cpp";
12231227
::jit_compiler::InMemoryFile SourceFile{SYCLFileName.c_str(),
1224-
SYCLSource.c_str()};
1228+
FinalSource.c_str()};
12251229

12261230
std::vector<::jit_compiler::InMemoryFile> IncludeFilesView;
12271231
IncludeFilesView.reserve(IncludePairs.size());

sycl/test-e2e/KernelCompiler/kernel_compiler_sycl_jit.cpp

Lines changed: 28 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,17 @@ void ff_cp(int *ptr, int *unused) {
6464
sycl::id<1> GId = Item.get_global_id();
6565
ptr[GId.get(0)] = AddEm(GId.get(0), 37);
6666
}
67+
68+
// this name will be mangled
69+
template <typename T>
70+
SYCL_EXTERNAL SYCL_EXT_ONEAPI_FUNCTION_PROPERTY((sycl::ext::oneapi::experimental::nd_range_kernel<1>))
71+
void ff_templated(T *ptr, T *unused) {
72+
73+
sycl::nd_item<1> Item = sycl::ext::oneapi::this_work_item::get_nd_item<1>();
74+
75+
sycl::id<1> GId = Item.get_global_id();
76+
ptr[GId.get(0)] = PlusEm(GId.get(0), 38);
77+
}
6778
)===";
6879

6980
void test_1(sycl::queue &Queue, sycl::kernel &Kernel, int seed) {
@@ -126,19 +137,32 @@ int test_build_and_run() {
126137
// Compilation of empty prop list, no devices.
127138
exe_kb kbExe1 = syclex::build(kbSrc);
128139

129-
// // Compilation with props and devices
140+
// Compilation with props and devices
130141
std::string log;
131142
std::vector<std::string> flags{"-g", "-fno-fast-math",
132143
"-fsycl-instrument-device-code"};
133144
std::vector<sycl::device> devs = kbSrc.get_devices();
134145
exe_kb kbExe2 = syclex::build(
135-
kbSrc, devs, syclex::properties{syclex::build_options{flags}});
146+
kbSrc, devs,
147+
syclex::properties{syclex::build_options{flags}, syclex::save_log{&log},
148+
syclex::registered_kernel_names{"ff_templated<int>"}});
136149

137-
// extern "C" was used, so the name "ff_cp" is not mangled.
150+
// extern "C" was used, so the name "ff_cp" is not mangled and can be used
151+
// directly.
138152
sycl::kernel k = kbExe2.ext_oneapi_get_kernel("ff_cp");
139153

154+
// The templated function name will have been mangled. Mapping from original
155+
// name to mangled is not yet supported. So we cannot yet do this:
156+
// sycl::kernel k2 = kbExe2.ext_oneapi_get_kernel("ff_templated<int>");
157+
158+
// Instead, we can TEMPORARILY use the mangled name. Once demangling is
159+
// supported this might no longer work.
160+
sycl::kernel k2 =
161+
kbExe2.ext_oneapi_get_kernel("_Z26__sycl_kernel_ff_templatedIiEvPT_S1_");
162+
140163
// Test the kernels.
141-
test_1(q, k, 37 + 5); // ff_cp seeds 37. AddEm will add 5 more.
164+
test_1(q, k, 37 + 5); // ff_cp seeds 37. AddEm will add 5 more.
165+
test_1(q, k2, 38 + 6); // ff_templated seeds 38. PlusEm adds 6 more.
142166

143167
return 0;
144168
}

0 commit comments

Comments
 (0)