Skip to content

Commit 0d5dc84

Browse files
[SYCL] Align is_compatible with compile_target checks (#16060)
is_compatible used a different logic compared to the checks used in the standard image selection path. This change makes both use a single code path, which adds compile_target related checks to is_compatible.
1 parent 4157203 commit 0d5dc84

File tree

5 files changed

+83
-66
lines changed

5 files changed

+83
-66
lines changed

sycl/source/detail/compiler.hpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,9 @@
3131
#define __SYCL_DEVICE_BINARY_TARGET_NVPTX64 "nvptx64"
3232
#define __SYCL_DEVICE_BINARY_TARGET_AMDGCN "amdgcn"
3333
#define __SYCL_DEVICE_BINARY_TARGET_NATIVE_CPU "native_cpu"
34+
// JIT compilation targets for CUDA & HIP devices.
35+
#define __SYCL_DEVICE_BINARY_TARGET_LLVM_NVPTX64 "llvm_nvptx64"
36+
#define __SYCL_DEVICE_BINARY_TARGET_LLVM_AMDGCN "llvm_amdgcn"
3437

3538
/// Device binary image property set names recognized by the SYCL runtime.
3639
/// Name must be consistent with

sycl/source/detail/program_manager/program_manager.cpp

Lines changed: 65 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -1341,7 +1341,7 @@ void CheckJITCompilationForImage(const RTDeviceBinaryImage *const &Image,
13411341

13421342
const char *getArchName(const device &Device) {
13431343
namespace syclex = sycl::ext::oneapi::experimental;
1344-
auto Arch = Device.get_info<syclex::info::device::architecture>();
1344+
auto Arch = getSyclObjImpl(Device)->getDeviceArch();
13451345
switch (Arch) {
13461346
#define __SYCL_ARCHITECTURE(ARCH, VAL) \
13471347
case syclex::architecture::ARCH: \
@@ -1369,45 +1369,14 @@ RTDeviceBinaryImage *getBinImageFromMultiMap(
13691369

13701370
// Here, we aim to select all the device images from the
13711371
// [ItBegin, ItEnd) range that are AOT compiled for Device
1372-
// (checked using info::device::architecture) or JIT compiled.
1372+
// (checked using info::device::architecture) or JIT compiled.
13731373
// This selection will then be passed to urDeviceSelectBinary
13741374
// for final selection.
1375-
std::string_view ArchName = getArchName(Device);
13761375
std::vector<RTDeviceBinaryImage *> DeviceFilteredImgs;
13771376
DeviceFilteredImgs.reserve(std::distance(ItBegin, ItEnd));
13781377
for (auto It = ItBegin; It != ItEnd; ++It) {
1379-
auto PropRange = It->second->getDeviceRequirements();
1380-
auto PropIt =
1381-
std::find_if(PropRange.begin(), PropRange.end(), [&](const auto &Prop) {
1382-
return Prop->Name == std::string_view("compile_target");
1383-
});
1384-
auto AddImg = [&]() { DeviceFilteredImgs.push_back(It->second); };
1385-
1386-
// Device image has no compile_target property, so it is JIT compiled.
1387-
if (PropIt == PropRange.end()) {
1388-
AddImg();
1389-
continue;
1390-
}
1391-
1392-
// Device image has the compile_target property, so it is AOT compiled for
1393-
// some device, check if that architecture is Device's architecture.
1394-
auto CompileTargetByteArray = DeviceBinaryProperty(*PropIt).asByteArray();
1395-
CompileTargetByteArray.dropBytes(8);
1396-
std::string_view CompileTarget(
1397-
reinterpret_cast<const char *>(&CompileTargetByteArray[0]),
1398-
CompileTargetByteArray.size());
1399-
// Note: there are no explicit targets for CPUs, so on x86_64,
1400-
// intel_cpu_spr, and intel_cpu_gnr, we use a spir64_x86_64
1401-
// compile target image.
1402-
// TODO: When dedicated targets for CPU are added, (i.e.
1403-
// -fsycl-targets=intel_cpu_spr etc.) remove this special
1404-
// handling of CPU targets.
1405-
if ((ArchName == CompileTarget) ||
1406-
(CompileTarget == "spir64_x86_64" &&
1407-
(ArchName == "x86_64" || ArchName == "intel_cpu_spr" ||
1408-
ArchName == "intel_cpu_gnr"))) {
1409-
AddImg();
1410-
}
1378+
if (doesImageTargetMatchDevice(*It->second, Device))
1379+
DeviceFilteredImgs.push_back(It->second);
14111380
}
14121381

14131382
if (DeviceFilteredImgs.empty())
@@ -3405,6 +3374,67 @@ checkDevSupportDeviceRequirements(const device &Dev,
34053374
return {};
34063375
}
34073376

3377+
bool doesImageTargetMatchDevice(const RTDeviceBinaryImage &Img,
3378+
const device &Dev) {
3379+
auto PropRange = Img.getDeviceRequirements();
3380+
auto PropIt =
3381+
std::find_if(PropRange.begin(), PropRange.end(), [&](const auto &Prop) {
3382+
return Prop->Name == std::string_view("compile_target");
3383+
});
3384+
// Device image has no compile_target property, check target.
3385+
if (PropIt == PropRange.end()) {
3386+
sycl::backend BE = Dev.get_backend();
3387+
const char *Target = Img.getRawData().DeviceTargetSpec;
3388+
if (strcmp(Target, __SYCL_DEVICE_BINARY_TARGET_SPIRV64) == 0) {
3389+
return (BE == sycl::backend::opencl ||
3390+
BE == sycl::backend::ext_oneapi_level_zero);
3391+
}
3392+
if (strcmp(Target, __SYCL_DEVICE_BINARY_TARGET_SPIRV64_X86_64) == 0) {
3393+
return Dev.is_cpu();
3394+
}
3395+
if (strcmp(Target, __SYCL_DEVICE_BINARY_TARGET_SPIRV64_GEN) == 0) {
3396+
return Dev.is_gpu() && (BE == sycl::backend::opencl ||
3397+
BE == sycl::backend::ext_oneapi_level_zero);
3398+
}
3399+
if (strcmp(Target, __SYCL_DEVICE_BINARY_TARGET_SPIRV64_FPGA) == 0) {
3400+
return Dev.is_accelerator();
3401+
}
3402+
if (strcmp(Target, __SYCL_DEVICE_BINARY_TARGET_NVPTX64) == 0 ||
3403+
strcmp(Target, __SYCL_DEVICE_BINARY_TARGET_LLVM_NVPTX64) == 0) {
3404+
return BE == sycl::backend::ext_oneapi_cuda;
3405+
}
3406+
if (strcmp(Target, __SYCL_DEVICE_BINARY_TARGET_AMDGCN) == 0 ||
3407+
strcmp(Target, __SYCL_DEVICE_BINARY_TARGET_LLVM_AMDGCN) == 0) {
3408+
return BE == sycl::backend::ext_oneapi_hip;
3409+
}
3410+
if (strcmp(Target, __SYCL_DEVICE_BINARY_TARGET_NATIVE_CPU) == 0) {
3411+
return BE == sycl::backend::ext_oneapi_native_cpu;
3412+
}
3413+
assert(false && "Unexpected image target");
3414+
return false;
3415+
}
3416+
3417+
// Device image has the compile_target property, so it is AOT compiled for
3418+
// some device, check if that architecture is Device's architecture.
3419+
auto CompileTargetByteArray = DeviceBinaryProperty(*PropIt).asByteArray();
3420+
// Drop 8 bytes describing the size of the byte array.
3421+
CompileTargetByteArray.dropBytes(8);
3422+
std::string_view CompileTarget(
3423+
reinterpret_cast<const char *>(&CompileTargetByteArray[0]),
3424+
CompileTargetByteArray.size());
3425+
std::string_view ArchName = getArchName(Dev);
3426+
// Note: there are no explicit targets for CPUs, so on x86_64,
3427+
// intel_cpu_spr, and intel_cpu_gnr, we use a spir64_x86_64
3428+
// compile target image.
3429+
// TODO: When dedicated targets for CPU are added, (i.e.
3430+
// -fsycl-targets=intel_cpu_spr etc.) remove this special
3431+
// handling of CPU targets.
3432+
return ((ArchName == CompileTarget) ||
3433+
(CompileTarget == "spir64_x86_64" &&
3434+
(ArchName == "x86_64" || ArchName == "intel_cpu_spr" ||
3435+
ArchName == "intel_cpu_gnr")));
3436+
}
3437+
34083438
} // namespace detail
34093439
} // namespace _V1
34103440
} // namespace sycl

sycl/source/detail/program_manager/program_manager.hpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,9 @@ checkDevSupportDeviceRequirements(const device &Dev,
5858
const RTDeviceBinaryImage &BinImages,
5959
const NDRDescT &NDRDesc = {});
6060

61+
bool doesImageTargetMatchDevice(const RTDeviceBinaryImage &Img,
62+
const device &Dev);
63+
6164
// This value must be the same as in libdevice/device_itt.h.
6265
// See sycl/doc/design/ITTAnnotations.md for more info.
6366
static constexpr uint32_t inline ITTSpecConstId = 0xFF747469;

sycl/source/kernel_bundle.cpp

Lines changed: 1 addition & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -307,35 +307,6 @@ std::vector<kernel_id> get_kernel_ids() {
307307
bool is_compatible(const std::vector<kernel_id> &KernelIDs, const device &Dev) {
308308
if (KernelIDs.empty())
309309
return true;
310-
// TODO: also need to check that the architecture specified by the
311-
// "-fsycl-targets" flag matches the device when we are able to get the
312-
// device's arch.
313-
auto doesImageTargetMatchDevice = [](const device &Dev,
314-
const detail::RTDeviceBinaryImage &Img) {
315-
const char *Target = Img.getRawData().DeviceTargetSpec;
316-
auto BE = Dev.get_backend();
317-
if (strcmp(Target, __SYCL_DEVICE_BINARY_TARGET_SPIRV64) == 0) {
318-
return (BE == sycl::backend::opencl ||
319-
BE == sycl::backend::ext_oneapi_level_zero);
320-
} else if (strcmp(Target, __SYCL_DEVICE_BINARY_TARGET_SPIRV64_X86_64) ==
321-
0) {
322-
return Dev.is_cpu();
323-
} else if (strcmp(Target, __SYCL_DEVICE_BINARY_TARGET_SPIRV64_GEN) == 0) {
324-
return Dev.is_gpu() && (BE == sycl::backend::opencl ||
325-
BE == sycl::backend::ext_oneapi_level_zero);
326-
} else if (strcmp(Target, __SYCL_DEVICE_BINARY_TARGET_SPIRV64_FPGA) == 0) {
327-
return Dev.is_accelerator();
328-
} else if (strcmp(Target, __SYCL_DEVICE_BINARY_TARGET_NVPTX64) == 0) {
329-
return BE == sycl::backend::ext_oneapi_cuda;
330-
} else if (strcmp(Target, __SYCL_DEVICE_BINARY_TARGET_AMDGCN) == 0) {
331-
return BE == sycl::backend::ext_oneapi_hip;
332-
} else if (strcmp(Target, __SYCL_DEVICE_BINARY_TARGET_NATIVE_CPU) == 0) {
333-
return BE == sycl::backend::ext_oneapi_native_cpu;
334-
}
335-
336-
return false;
337-
};
338-
339310
// One kernel may be contained in several binary images depending on the
340311
// number of targets. This kernel is compatible with the device if there is
341312
// at least one image (containing this kernel) whose aspects are supported by
@@ -347,7 +318,7 @@ bool is_compatible(const std::vector<kernel_id> &KernelIDs, const device &Dev) {
347318
if (std::none_of(BinImages.begin(), BinImages.end(),
348319
[&](const detail::RTDeviceBinaryImage *Img) {
349320
return doesDevSupportDeviceRequirements(Dev, *Img) &&
350-
doesImageTargetMatchDevice(Dev, *Img);
321+
doesImageTargetMatchDevice(*Img, Dev);
351322
}))
352323
return false;
353324
}

sycl/unittests/program_manager/CompileTarget.cpp

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,12 +59,14 @@ class NDRangeKernel;
5959
class RangeKernel;
6060
class NoDeviceKernel;
6161
class JITFallbackKernel;
62+
class SKLOnlyKernel;
6263

6364
MOCK_INTEGRATION_HEADER(SingleTaskKernel)
6465
MOCK_INTEGRATION_HEADER(NDRangeKernel)
6566
MOCK_INTEGRATION_HEADER(RangeKernel)
6667
MOCK_INTEGRATION_HEADER(NoDeviceKernel)
6768
MOCK_INTEGRATION_HEADER(JITFallbackKernel)
69+
MOCK_INTEGRATION_HEADER(SKLOnlyKernel)
6870

6971
static sycl::unittest::MockDeviceImage Img[] = {
7072
sycl::unittest::generateDefaultImage({"SingleTaskKernel"}),
@@ -93,7 +95,8 @@ static sycl::unittest::MockDeviceImage Img[] = {
9395
sycl::unittest::generateDefaultImage({"JITFallbackKernel"}),
9496
sycl::unittest::generateImageWithCompileTarget("JITFallbackKernel",
9597
"intel_gpu_bdw"),
96-
};
98+
sycl::unittest::generateImageWithCompileTarget("SKLOnlyKernel",
99+
"intel_gpu_skl")};
97100

98101
static sycl::unittest::MockDeviceImageArray<std::size(Img)> ImgArray{Img};
99102

@@ -336,3 +339,10 @@ TEST_F(CompileTargetTest, JITFallbackKernel) {
336339
ASSERT_EQ(createWithILLog.size(), 1U);
337340
EXPECT_EQ(createWithILLog.back(), "JITFallbackKernel");
338341
}
342+
343+
TEST_F(CompileTargetTest, IsCompatible) {
344+
device Skl{archSelector(syclex::architecture::intel_gpu_skl)};
345+
EXPECT_TRUE(sycl::is_compatible<SKLOnlyKernel>(Skl));
346+
device Pvc{archSelector(syclex::architecture::intel_gpu_pvc)};
347+
EXPECT_FALSE(sycl::is_compatible<SKLOnlyKernel>(Pvc));
348+
}

0 commit comments

Comments
 (0)