Skip to content

Commit 662d3b5

Browse files
authored
[SYCL] Fix OpenCL C to spirv kernel_compiler for the multi-device case (#15099)
Currently if there is only single device in the context then kernel compiler passes ip version of that device via -device option to ocloc when compiling OpenCL program to spirv to let ocloc enable all extensions supported by that device. Problem is that ocloc -spv_only doesn't produce spirv file when multiple devices are provided via -device option. That's why in this case enable common extensions supported by all devices manually. To do that use ocloc query to get common supported features for the list of devices and then process the return and enable features via ocloc -internal_options -cl-ext=+feature1,...
1 parent de8aefb commit 662d3b5

File tree

2 files changed

+171
-68
lines changed

2 files changed

+171
-68
lines changed

sycl/source/detail/kernel_compiler/kernel_compiler_opencl.cpp

Lines changed: 141 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616

1717
#include <cstring> // strlen
1818
#include <numeric> // for std::accumulate
19+
#include <regex>
1920
#include <sstream>
2021

2122
namespace sycl {
@@ -130,6 +131,64 @@ std::string IPVersionsToString(const std::vector<uint32_t> IPVersionVec) {
130131
return ss.str();
131132
}
132133

134+
std::string InvokeOclocQuery(const std::vector<uint32_t> &IPVersionVec,
135+
const char *identifier) {
136+
137+
std::string QueryLog = "";
138+
139+
// handles into ocloc shared lib
140+
static void *oclocInvokeHandle = nullptr;
141+
static void *oclocFreeOutputHandle = nullptr;
142+
std::error_code the_errc = make_error_code(errc::runtime);
143+
144+
SetupLibrary(oclocInvokeHandle, oclocFreeOutputHandle, the_errc);
145+
146+
uint32_t NumOutputs = 0;
147+
uint8_t **Outputs = nullptr;
148+
uint64_t *OutputLengths = nullptr;
149+
char **OutputNames = nullptr;
150+
151+
std::vector<const char *> Args = {"ocloc", "query"};
152+
std::string IPVersionsStr = IPVersionsToString(IPVersionVec);
153+
if (!IPVersionsStr.empty()) {
154+
Args.push_back("-device");
155+
Args.push_back(IPVersionsStr.c_str());
156+
}
157+
Args.push_back(identifier);
158+
159+
decltype(::oclocInvoke) *OclocInvokeFunc =
160+
reinterpret_cast<decltype(::oclocInvoke) *>(oclocInvokeHandle);
161+
162+
int InvokeError = OclocInvokeFunc(
163+
Args.size(), Args.data(), 0, nullptr, 0, nullptr, 0, nullptr, nullptr,
164+
nullptr, &NumOutputs, &Outputs, &OutputLengths, &OutputNames);
165+
166+
// Gather the results.
167+
for (uint32_t i = 0; i < NumOutputs; i++) {
168+
if (!strcmp(OutputNames[i], "stdout.log")) {
169+
if (OutputLengths[i] > 0) {
170+
const char *LogText = reinterpret_cast<const char *>(Outputs[i]);
171+
QueryLog.append(LogText, OutputLengths[i]);
172+
}
173+
}
174+
}
175+
176+
// Try to free memory before reporting possible error.
177+
decltype(::oclocFreeOutput) *OclocFreeOutputFunc =
178+
reinterpret_cast<decltype(::oclocFreeOutput) *>(oclocFreeOutputHandle);
179+
int MemFreeError =
180+
OclocFreeOutputFunc(&NumOutputs, &Outputs, &OutputLengths, &OutputNames);
181+
182+
if (InvokeError)
183+
throw sycl::exception(the_errc,
184+
"ocloc reported errors: {\n" + QueryLog + "\n}");
185+
186+
if (MemFreeError)
187+
throw sycl::exception(the_errc, "ocloc cannot safely free resources");
188+
189+
return QueryLog;
190+
}
191+
133192
spirv_vec_t OpenCLC_to_SPIRV(const std::string &Source,
134193
const std::vector<uint32_t> &IPVersionVec,
135194
const std::vector<std::string> &UserArgs,
@@ -167,13 +226,85 @@ spirv_vec_t OpenCLC_to_SPIRV(const std::string &Source,
167226
Args.push_back("-file");
168227
Args.push_back(SourceName);
169228

170-
// device
171-
std::string IPVersionsStr = IPVersionsToString(IPVersionVec);
172-
if (!IPVersionsStr.empty()) {
173-
Args.push_back("-device");
174-
Args.push_back(IPVersionsStr.c_str());
175-
}
229+
std::string IPVersionsStr;
230+
std::string OpenCLCFeaturesOption;
231+
std::string ExtensionsOption;
232+
std::string VersionOption;
233+
auto hasSingleDeviceOrSameDevices = [](auto &IPVersionVec) -> bool {
234+
auto IPVersion = IPVersionVec.begin();
235+
for (auto IPVersionItem = ++std::begin(IPVersionVec);
236+
IPVersionItem != std::end(IPVersionVec); IPVersionItem++)
237+
if (*IPVersionItem != *IPVersion)
238+
return false;
239+
240+
return true;
241+
};
242+
243+
assert(IPVersionVec.size() >= 1 &&
244+
"At least one device must be provided to build_from_source(...).");
245+
if (hasSingleDeviceOrSameDevices(IPVersionVec)) {
246+
// If we have a single device (or all devices are the same) then pass it
247+
// through -device option to enable all extensions supported by that device.
248+
IPVersionsStr = IPVersionsToString({IPVersionVec.at(0)});
249+
if (!IPVersionsStr.empty()) {
250+
Args.push_back("-device");
251+
Args.push_back(IPVersionsStr.c_str());
252+
}
253+
} else {
254+
// Currently ocloc -spv_only doesn't produce spirv file when multiple
255+
// devices are provided via -device option. That's why in this case we have
256+
// to enable common extensions supported by all devices manually.
257+
258+
// Find maximum opencl version supported by all devices in IPVersionVec.
259+
auto OpenCLVersions =
260+
InvokeOclocQuery(IPVersionVec, "CL_DEVICE_OPENCL_C_ALL_VERSIONS");
261+
const std::regex VersionRegEx("[0-9].[0-9].[0-9]");
262+
std::string const &(*max)(std::string const &, std::string const &) =
263+
std::max<std::string>;
264+
auto MaxVersion = std::accumulate(
265+
std::sregex_token_iterator(OpenCLVersions.begin(), OpenCLVersions.end(),
266+
VersionRegEx),
267+
std::sregex_token_iterator(), std::string("0.0.0"), max);
268+
269+
// Find common extensions supported by all devices in IPVersionVec.
270+
// Lambda to accumulate extensions in the format +extension1,+extension2...
271+
// to pass to ocloc as an option.
272+
auto Accum = [](const std::string &acc, const std::string &s) {
273+
return acc + (acc.empty() ? "+" : ",+") + s;
274+
};
275+
276+
// If OpenCL version is higher that 3.0.0 then we need to enable OpenCL C
277+
// features as well in addition to CL extensions.
278+
if (MaxVersion >= "3.0.0") {
279+
// construct a string which enables common extensions supported by
280+
// devices.
281+
auto OpenCLCFeatures =
282+
InvokeOclocQuery(IPVersionVec, "CL_DEVICE_OPENCL_C_FEATURES");
283+
const std::regex OpenCLCRegEx("__opencl_c_[^:]+");
284+
auto OpenCLCFeaturesValue = std::accumulate(
285+
std::sregex_token_iterator(OpenCLCFeatures.begin(),
286+
OpenCLCFeatures.end(), OpenCLCRegEx),
287+
std::sregex_token_iterator(), std::string(""), Accum);
288+
if (OpenCLCFeaturesValue.size()) {
289+
OpenCLCFeaturesOption = "-cl-ext=" + OpenCLCFeaturesValue;
290+
Args.push_back("-internal_options");
291+
Args.push_back(OpenCLCFeaturesOption.c_str());
292+
}
293+
}
176294

295+
// Accumulate CL extensions into an option.
296+
auto Extensions = InvokeOclocQuery(IPVersionVec, "CL_DEVICE_EXTENSIONS");
297+
const std::regex CLRegEx("cl_[^\\s]+");
298+
auto ExtensionsValue =
299+
std::accumulate(std::sregex_token_iterator(Extensions.begin(),
300+
Extensions.end(), CLRegEx),
301+
std::sregex_token_iterator(), std::string(""), Accum);
302+
if (ExtensionsValue.size()) {
303+
ExtensionsOption = "-cl-ext=" + ExtensionsValue;
304+
Args.push_back("-internal_options");
305+
Args.push_back(ExtensionsOption.c_str());
306+
}
307+
}
177308
// invoke
178309
decltype(::oclocInvoke) *OclocInvokeFunc =
179310
reinterpret_cast<decltype(::oclocInvoke) *>(oclocInvokeHandle);
@@ -221,69 +352,11 @@ spirv_vec_t OpenCLC_to_SPIRV(const std::string &Source,
221352
return SpirV;
222353
}
223354

224-
std::string InvokeOclocQuery(uint32_t IPVersion, const char *identifier) {
225-
226-
std::string QueryLog = "";
227-
228-
// handles into ocloc shared lib
229-
static void *oclocInvokeHandle = nullptr;
230-
static void *oclocFreeOutputHandle = nullptr;
231-
std::error_code the_errc = make_error_code(errc::runtime);
232-
233-
SetupLibrary(oclocInvokeHandle, oclocFreeOutputHandle, the_errc);
234-
235-
uint32_t NumOutputs = 0;
236-
uint8_t **Outputs = nullptr;
237-
uint64_t *OutputLengths = nullptr;
238-
char **OutputNames = nullptr;
239-
240-
std::vector<const char *> Args = {"ocloc", "query"};
241-
std::vector<uint32_t> IPVersionVec{IPVersion};
242-
std::string IPVersionsStr = IPVersionsToString(IPVersionVec);
243-
if (!IPVersionsStr.empty()) {
244-
Args.push_back("-device");
245-
Args.push_back(IPVersionsStr.c_str());
246-
}
247-
Args.push_back(identifier);
248-
249-
decltype(::oclocInvoke) *OclocInvokeFunc =
250-
reinterpret_cast<decltype(::oclocInvoke) *>(oclocInvokeHandle);
251-
252-
int InvokeError = OclocInvokeFunc(
253-
Args.size(), Args.data(), 0, nullptr, 0, nullptr, 0, nullptr, nullptr,
254-
nullptr, &NumOutputs, &Outputs, &OutputLengths, &OutputNames);
255-
256-
// Gather the results.
257-
for (uint32_t i = 0; i < NumOutputs; i++) {
258-
if (!strcmp(OutputNames[i], "stdout.log")) {
259-
if (OutputLengths[i] > 0) {
260-
const char *LogText = reinterpret_cast<const char *>(Outputs[i]);
261-
QueryLog.append(LogText, OutputLengths[i]);
262-
}
263-
}
264-
}
265-
266-
// Try to free memory before reporting possible error.
267-
decltype(::oclocFreeOutput) *OclocFreeOutputFunc =
268-
reinterpret_cast<decltype(::oclocFreeOutput) *>(oclocFreeOutputHandle);
269-
int MemFreeError =
270-
OclocFreeOutputFunc(&NumOutputs, &Outputs, &OutputLengths, &OutputNames);
271-
272-
if (InvokeError)
273-
throw sycl::exception(the_errc,
274-
"ocloc reported errors: {\n" + QueryLog + "\n}");
275-
276-
if (MemFreeError)
277-
throw sycl::exception(the_errc, "ocloc cannot safely free resources");
278-
279-
return QueryLog;
280-
}
281-
282355
bool OpenCLC_Feature_Available(const std::string &Feature, uint32_t IPVersion) {
283356
static std::string FeatureLog = "";
284357
if (FeatureLog.empty()) {
285358
try {
286-
FeatureLog = InvokeOclocQuery(IPVersion, "CL_DEVICE_OPENCL_C_FEATURES");
359+
FeatureLog = InvokeOclocQuery({IPVersion}, "CL_DEVICE_OPENCL_C_FEATURES");
287360
} catch (sycl::exception &) {
288361
return false;
289362
}
@@ -299,7 +372,7 @@ bool OpenCLC_Supports_Version(
299372
if (VersionLog.empty()) {
300373
try {
301374
VersionLog =
302-
InvokeOclocQuery(IPVersion, "CL_DEVICE_OPENCL_C_ALL_VERSIONS");
375+
InvokeOclocQuery({IPVersion}, "CL_DEVICE_OPENCL_C_ALL_VERSIONS");
303376
} catch (sycl::exception &) {
304377
return false;
305378
}
@@ -320,7 +393,7 @@ bool OpenCLC_Supports_Extension(
320393
if (ExtensionByVersionLog.empty()) {
321394
try {
322395
ExtensionByVersionLog =
323-
InvokeOclocQuery(IPVersion, "CL_DEVICE_EXTENSIONS_WITH_VERSION");
396+
InvokeOclocQuery({IPVersion}, "CL_DEVICE_EXTENSIONS_WITH_VERSION");
324397
} catch (sycl::exception &) {
325398
return false;
326399
}
@@ -371,7 +444,7 @@ bool OpenCLC_Supports_Extension(
371444

372445
std::string OpenCLC_Profile(uint32_t IPVersion) {
373446
try {
374-
std::string result = InvokeOclocQuery(IPVersion, "CL_DEVICE_PROFILE");
447+
std::string result = InvokeOclocQuery({IPVersion}, "CL_DEVICE_PROFILE");
375448
// NOTE: result has \n\n amended. Clean it up.
376449
// TODO: remove this once the ocloc query is fixed.
377450
result.erase(std::remove_if(result.begin(), result.end(),
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
// REQUIRES: (opencl || level_zero)
2+
// RUN: %{build} -o %t.out
3+
// RUN: env NEOReadDebugKeys=1 CreateMultipleRootDevices=3 %{run} %t.out
4+
5+
#include <sycl/detail/core.hpp>
6+
7+
// Test to check that bundle is buildable from OpenCL source if there are
8+
// multiple devices in the context.
9+
10+
auto constexpr CLSource = R"===(
11+
__kernel void Kernel1(int in, __global int *out) {
12+
out[0] = in;
13+
}
14+
15+
__kernel void Kernel2(short in, __global short *out) {
16+
out[0] = in;
17+
}
18+
)===";
19+
20+
int main() {
21+
sycl::platform Platform;
22+
auto Context = Platform.ext_oneapi_get_default_context();
23+
24+
auto SourceKB =
25+
sycl::ext::oneapi::experimental::create_kernel_bundle_from_source(
26+
Context, sycl::ext::oneapi::experimental::source_language::opencl,
27+
CLSource);
28+
auto ExecKB = sycl::ext::oneapi::experimental::build(SourceKB);
29+
return 0;
30+
}

0 commit comments

Comments
 (0)