@@ -1341,7 +1341,7 @@ void CheckJITCompilationForImage(const RTDeviceBinaryImage *const &Image,
13411341
13421342const char *getArchName (const device &Device) {
13431343 namespace syclex = sycl::ext::oneapi::experimental;
1344- auto Arch = Device. get_info <syclex::info::device::architecture> ();
1344+ auto Arch = getSyclObjImpl ( Device)-> getDeviceArch ();
13451345 switch (Arch) {
13461346#define __SYCL_ARCHITECTURE (ARCH, VAL ) \
13471347 case syclex::architecture::ARCH: \
@@ -1369,45 +1369,14 @@ RTDeviceBinaryImage *getBinImageFromMultiMap(
13691369
13701370 // Here, we aim to select all the device images from the
13711371 // [ItBegin, ItEnd) range that are AOT compiled for Device
1372- // (checked using info::device::architecture) or JIT compiled.
1372+ // (checked using info::device::architecture) or JIT compiled.
13731373 // This selection will then be passed to urDeviceSelectBinary
13741374 // for final selection.
1375- std::string_view ArchName = getArchName (Device);
13761375 std::vector<RTDeviceBinaryImage *> DeviceFilteredImgs;
13771376 DeviceFilteredImgs.reserve (std::distance (ItBegin, ItEnd));
13781377 for (auto It = ItBegin; It != ItEnd; ++It) {
1379- auto PropRange = It->second ->getDeviceRequirements ();
1380- auto PropIt =
1381- std::find_if (PropRange.begin (), PropRange.end (), [&](const auto &Prop) {
1382- return Prop->Name == std::string_view (" compile_target" );
1383- });
1384- auto AddImg = [&]() { DeviceFilteredImgs.push_back (It->second ); };
1385-
1386- // Device image has no compile_target property, so it is JIT compiled.
1387- if (PropIt == PropRange.end ()) {
1388- AddImg ();
1389- continue ;
1390- }
1391-
1392- // Device image has the compile_target property, so it is AOT compiled for
1393- // some device, check if that architecture is Device's architecture.
1394- auto CompileTargetByteArray = DeviceBinaryProperty (*PropIt).asByteArray ();
1395- CompileTargetByteArray.dropBytes (8 );
1396- std::string_view CompileTarget (
1397- reinterpret_cast <const char *>(&CompileTargetByteArray[0 ]),
1398- CompileTargetByteArray.size ());
1399- // Note: there are no explicit targets for CPUs, so on x86_64,
1400- // intel_cpu_spr, and intel_cpu_gnr, we use a spir64_x86_64
1401- // compile target image.
1402- // TODO: When dedicated targets for CPU are added, (i.e.
1403- // -fsycl-targets=intel_cpu_spr etc.) remove this special
1404- // handling of CPU targets.
1405- if ((ArchName == CompileTarget) ||
1406- (CompileTarget == " spir64_x86_64" &&
1407- (ArchName == " x86_64" || ArchName == " intel_cpu_spr" ||
1408- ArchName == " intel_cpu_gnr" ))) {
1409- AddImg ();
1410- }
1378+ if (doesImageTargetMatchDevice (*It->second , Device))
1379+ DeviceFilteredImgs.push_back (It->second );
14111380 }
14121381
14131382 if (DeviceFilteredImgs.empty ())
@@ -3405,6 +3374,67 @@ checkDevSupportDeviceRequirements(const device &Dev,
34053374 return {};
34063375}
34073376
3377+ bool doesImageTargetMatchDevice (const RTDeviceBinaryImage &Img,
3378+ const device &Dev) {
3379+ auto PropRange = Img.getDeviceRequirements ();
3380+ auto PropIt =
3381+ std::find_if (PropRange.begin (), PropRange.end (), [&](const auto &Prop) {
3382+ return Prop->Name == std::string_view (" compile_target" );
3383+ });
3384+ // Device image has no compile_target property, check target.
3385+ if (PropIt == PropRange.end ()) {
3386+ sycl::backend BE = Dev.get_backend ();
3387+ const char *Target = Img.getRawData ().DeviceTargetSpec ;
3388+ if (strcmp (Target, __SYCL_DEVICE_BINARY_TARGET_SPIRV64) == 0 ) {
3389+ return (BE == sycl::backend::opencl ||
3390+ BE == sycl::backend::ext_oneapi_level_zero);
3391+ }
3392+ if (strcmp (Target, __SYCL_DEVICE_BINARY_TARGET_SPIRV64_X86_64) == 0 ) {
3393+ return Dev.is_cpu ();
3394+ }
3395+ if (strcmp (Target, __SYCL_DEVICE_BINARY_TARGET_SPIRV64_GEN) == 0 ) {
3396+ return Dev.is_gpu () && (BE == sycl::backend::opencl ||
3397+ BE == sycl::backend::ext_oneapi_level_zero);
3398+ }
3399+ if (strcmp (Target, __SYCL_DEVICE_BINARY_TARGET_SPIRV64_FPGA) == 0 ) {
3400+ return Dev.is_accelerator ();
3401+ }
3402+ if (strcmp (Target, __SYCL_DEVICE_BINARY_TARGET_NVPTX64) == 0 ||
3403+ strcmp (Target, __SYCL_DEVICE_BINARY_TARGET_LLVM_NVPTX64) == 0 ) {
3404+ return BE == sycl::backend::ext_oneapi_cuda;
3405+ }
3406+ if (strcmp (Target, __SYCL_DEVICE_BINARY_TARGET_AMDGCN) == 0 ||
3407+ strcmp (Target, __SYCL_DEVICE_BINARY_TARGET_LLVM_AMDGCN) == 0 ) {
3408+ return BE == sycl::backend::ext_oneapi_hip;
3409+ }
3410+ if (strcmp (Target, __SYCL_DEVICE_BINARY_TARGET_NATIVE_CPU) == 0 ) {
3411+ return BE == sycl::backend::ext_oneapi_native_cpu;
3412+ }
3413+ assert (false && " Unexpected image target" );
3414+ return false ;
3415+ }
3416+
3417+ // Device image has the compile_target property, so it is AOT compiled for
3418+ // some device, check if that architecture is Device's architecture.
3419+ auto CompileTargetByteArray = DeviceBinaryProperty (*PropIt).asByteArray ();
3420+ // Drop 8 bytes describing the size of the byte array.
3421+ CompileTargetByteArray.dropBytes (8 );
3422+ std::string_view CompileTarget (
3423+ reinterpret_cast <const char *>(&CompileTargetByteArray[0 ]),
3424+ CompileTargetByteArray.size ());
3425+ std::string_view ArchName = getArchName (Dev);
3426+ // Note: there are no explicit targets for CPUs, so on x86_64,
3427+ // intel_cpu_spr, and intel_cpu_gnr, we use a spir64_x86_64
3428+ // compile target image.
3429+ // TODO: When dedicated targets for CPU are added, (i.e.
3430+ // -fsycl-targets=intel_cpu_spr etc.) remove this special
3431+ // handling of CPU targets.
3432+ return ((ArchName == CompileTarget) ||
3433+ (CompileTarget == " spir64_x86_64" &&
3434+ (ArchName == " x86_64" || ArchName == " intel_cpu_spr" ||
3435+ ArchName == " intel_cpu_gnr" )));
3436+ }
3437+
34083438} // namespace detail
34093439} // namespace _V1
34103440} // namespace sycl
0 commit comments