Skip to content

Commit 1846390

Browse files
[SYCL][SYCLBIN] Select best images based on state (#20197)
Previously, the implementation would prioritize native device code binaries over IR modules indiscriminately. However, for input state and object state, native device code images are unsuitable. As such, this commit makes it so that the native device code images are only selected when the target state is executable. As an effect of this change, input and object tests will no longer run for CUDA and HIP. This is appropriate given these targets only currently support generating native device code images, which are meant to be as close to executable state as possible. Note that the driver doesn't currently make such limitations of the generated SYCLBIN files, so producing input and object state binaries with CUDA and HIP targets will result in kernel bundles with no binary images. Signed-off-by: Larsen, Steffen <[email protected]>
1 parent 2ffa3a0 commit 1846390

11 files changed

+111
-58
lines changed

sycl/source/detail/kernel_bundle_impl.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -580,7 +580,7 @@ class kernel_bundle_impl
580580
"kernel_bundle state does not match the state of the SYCLBIN file.");
581581

582582
std::vector<const detail::RTDeviceBinaryImage *> BestImages =
583-
SYCLBIN->getBestCompatibleImages(Devs);
583+
SYCLBIN->getBestCompatibleImages(Devs, State);
584584
MDeviceImages.reserve(BestImages.size());
585585
for (const detail::RTDeviceBinaryImage *Image : BestImages)
586586
MDeviceImages.emplace_back(device_image_impl::create(

sycl/source/detail/syclbin.cpp

Lines changed: 62 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -272,21 +272,36 @@ SYCLBINBinaries::SYCLBINBinaries(const char *SYCLBINContent, size_t SYCLBINSize)
272272
: SYCLBINContentCopy{ContentCopy(SYCLBINContent, SYCLBINSize)},
273273
SYCLBINContentCopySize{SYCLBINSize},
274274
ParsedSYCLBIN(SYCLBIN{SYCLBINContentCopy.get(), SYCLBINSize}) {
275-
size_t NumJITBinaries = 0, NumNativeBinaries = 0;
276-
for (const SYCLBIN::AbstractModule &AM : ParsedSYCLBIN.AbstractModules) {
277-
NumJITBinaries += AM.IRModules.size();
278-
NumNativeBinaries += AM.NativeDeviceCodeImages.size();
279-
}
280-
DeviceBinaries.reserve(NumJITBinaries + NumNativeBinaries);
281-
JITDeviceBinaryImages.reserve(NumJITBinaries);
282-
NativeDeviceBinaryImages.reserve(NumNativeBinaries);
275+
AbstractModuleDescriptors = std::unique_ptr<AbstractModuleDesc[]>(
276+
new AbstractModuleDesc[ParsedSYCLBIN.AbstractModules.size()]);
277+
278+
size_t NumBinaries = 0;
279+
for (const SYCLBIN::AbstractModule &AM : ParsedSYCLBIN.AbstractModules)
280+
NumBinaries += AM.IRModules.size() + AM.NativeDeviceCodeImages.size();
281+
DeviceBinaries.reserve(NumBinaries);
282+
BinaryImages = std::unique_ptr<RTDeviceBinaryImage[]>(
283+
new RTDeviceBinaryImage[NumBinaries]);
284+
285+
RTDeviceBinaryImage *CurrentBinaryImagesStart = BinaryImages.get();
286+
for (size_t I = 0; I < getNumAbstractModules(); ++I) {
287+
SYCLBIN::AbstractModule &AM = ParsedSYCLBIN.AbstractModules[I];
288+
AbstractModuleDesc &AMDesc = AbstractModuleDescriptors[I];
289+
290+
// Set up the abstract module descriptor.
291+
AMDesc.NumJITBinaries = AM.IRModules.size();
292+
AMDesc.NumNativeBinaries = AM.NativeDeviceCodeImages.size();
293+
AMDesc.JITBinaries = CurrentBinaryImagesStart;
294+
AMDesc.NativeBinaries = CurrentBinaryImagesStart + AMDesc.NumJITBinaries;
295+
CurrentBinaryImagesStart +=
296+
AMDesc.NumJITBinaries + AM.NativeDeviceCodeImages.size();
283297

284-
for (SYCLBIN::AbstractModule &AM : ParsedSYCLBIN.AbstractModules) {
285298
// Construct properties from SYCLBIN metadata.
286299
std::vector<_sycl_device_binary_property_set_struct> &BinPropertySets =
287300
convertAbstractModuleProperties(AM);
288301

289-
for (SYCLBIN::IRModule &IRM : AM.IRModules) {
302+
for (size_t J = 0; J < AM.IRModules.size(); ++J) {
303+
SYCLBIN::IRModule &IRM = AM.IRModules[J];
304+
290305
sycl_device_binary_struct &DeviceBinary = DeviceBinaries.emplace_back();
291306
DeviceBinary.Version = SYCL_DEVICE_BINARY_VERSION;
292307
DeviceBinary.Kind = 4;
@@ -309,11 +324,12 @@ SYCLBINBinaries::SYCLBINBinaries(const char *SYCLBINContent, size_t SYCLBINSize)
309324
DeviceBinary.PropertySetsEnd =
310325
BinPropertySets.data() + BinPropertySets.size();
311326
// Create an image from it.
312-
JITDeviceBinaryImages.emplace_back(&DeviceBinary);
327+
AMDesc.JITBinaries[J] = RTDeviceBinaryImage{&DeviceBinary};
313328
}
314329

315-
for (const SYCLBIN::NativeDeviceCodeImage &NDCI :
316-
AM.NativeDeviceCodeImages) {
330+
for (size_t J = 0; J < AM.NativeDeviceCodeImages.size(); ++J) {
331+
const SYCLBIN::NativeDeviceCodeImage &NDCI = AM.NativeDeviceCodeImages[J];
332+
317333
assert(NDCI.Metadata != nullptr);
318334
PropertySet &NDCIMetadataProps = (*NDCI.Metadata)
319335
[PropertySetRegistry::SYCLBIN_NATIVE_DEVICE_CODE_IMAGE_METADATA];
@@ -346,7 +362,7 @@ SYCLBINBinaries::SYCLBINBinaries(const char *SYCLBINContent, size_t SYCLBINSize)
346362
DeviceBinary.PropertySetsEnd =
347363
BinPropertySets.data() + BinPropertySets.size();
348364
// Create an image from it.
349-
NativeDeviceBinaryImages.emplace_back(&DeviceBinary);
365+
AMDesc.NativeBinaries[J] = RTDeviceBinaryImage{&DeviceBinary};
350366
}
351367
}
352368
}
@@ -394,33 +410,44 @@ SYCLBINBinaries::convertAbstractModuleProperties(SYCLBIN::AbstractModule &AM) {
394410
}
395411

396412
std::vector<const RTDeviceBinaryImage *>
397-
SYCLBINBinaries::getBestCompatibleImages(device_impl &Dev) {
398-
auto SelectCompatibleImages =
399-
[&](const std::vector<RTDeviceBinaryImage> &Imgs) {
400-
std::vector<const RTDeviceBinaryImage *> CompatImgs;
401-
for (const RTDeviceBinaryImage &Img : Imgs)
402-
if (doesDevSupportDeviceRequirements(Dev, Img) &&
403-
doesImageTargetMatchDevice(Img, Dev))
404-
CompatImgs.push_back(&Img);
405-
return CompatImgs;
406-
};
407-
408-
// Try with native images first.
409-
std::vector<const RTDeviceBinaryImage *> NativeImgs =
410-
SelectCompatibleImages(NativeDeviceBinaryImages);
411-
if (!NativeImgs.empty())
412-
return NativeImgs;
413-
414-
// If there were no native images, pick JIT images.
415-
return SelectCompatibleImages(JITDeviceBinaryImages);
413+
SYCLBINBinaries::getBestCompatibleImages(device_impl &Dev, bundle_state State) {
414+
auto GetCompatibleImage = [&](const RTDeviceBinaryImage *Imgs,
415+
size_t NumImgs) {
416+
const RTDeviceBinaryImage *CompatImagePtr =
417+
std::find_if(Imgs, Imgs + NumImgs, [&](const RTDeviceBinaryImage &Img) {
418+
return doesDevSupportDeviceRequirements(Dev, Img) &&
419+
doesImageTargetMatchDevice(Img, Dev);
420+
});
421+
return (CompatImagePtr != Imgs + NumImgs) ? CompatImagePtr : nullptr;
422+
};
423+
424+
std::vector<const RTDeviceBinaryImage *> Images;
425+
for (size_t I = 0; I < getNumAbstractModules(); ++I) {
426+
const AbstractModuleDesc &AMDesc = AbstractModuleDescriptors[I];
427+
// If the target state is executable, try with native images first.
428+
if (State == bundle_state::executable) {
429+
if (const RTDeviceBinaryImage *CompatImagePtr = GetCompatibleImage(
430+
AMDesc.NativeBinaries, AMDesc.NumNativeBinaries)) {
431+
Images.push_back(CompatImagePtr);
432+
continue;
433+
}
434+
}
435+
436+
// Otherwise, select the first compatible JIT binary.
437+
if (const RTDeviceBinaryImage *CompatImagePtr =
438+
GetCompatibleImage(AMDesc.JITBinaries, AMDesc.NumJITBinaries))
439+
Images.push_back(CompatImagePtr);
440+
}
441+
return Images;
416442
}
417443

418444
std::vector<const RTDeviceBinaryImage *>
419-
SYCLBINBinaries::getBestCompatibleImages(devices_range Devs) {
445+
SYCLBINBinaries::getBestCompatibleImages(devices_range Devs,
446+
bundle_state State) {
420447
std::set<const RTDeviceBinaryImage *> Images;
421448
for (device_impl &Dev : Devs) {
422449
std::vector<const RTDeviceBinaryImage *> BestImagesForDev =
423-
getBestCompatibleImages(Dev);
450+
getBestCompatibleImages(Dev, State);
424451
Images.insert(BestImagesForDev.cbegin(), BestImagesForDev.cend());
425452
}
426453
return {Images.cbegin(), Images.cend()};

sycl/source/detail/syclbin.hpp

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -124,9 +124,9 @@ struct SYCLBINBinaries {
124124
~SYCLBINBinaries() = default;
125125

126126
std::vector<const RTDeviceBinaryImage *>
127-
getBestCompatibleImages(device_impl &Dev);
127+
getBestCompatibleImages(device_impl &Dev, bundle_state State);
128128
std::vector<const RTDeviceBinaryImage *>
129-
getBestCompatibleImages(devices_range Dev);
129+
getBestCompatibleImages(devices_range Dev, bundle_state State);
130130

131131
uint8_t getState() const {
132132
PropertySet &GlobalMetadata =
@@ -143,6 +143,10 @@ struct SYCLBINBinaries {
143143
std::vector<_sycl_device_binary_property_set_struct> &
144144
convertAbstractModuleProperties(SYCLBIN::AbstractModule &AM);
145145

146+
size_t getNumAbstractModules() const {
147+
return ParsedSYCLBIN.AbstractModules.size();
148+
}
149+
146150
std::unique_ptr<char[]> SYCLBINContentCopy = nullptr;
147151
size_t SYCLBINContentCopySize = 0;
148152

@@ -156,8 +160,16 @@ struct SYCLBINBinaries {
156160
BinaryPropertySets;
157161

158162
std::vector<sycl_device_binary_struct> DeviceBinaries;
159-
std::vector<RTDeviceBinaryImage> JITDeviceBinaryImages;
160-
std::vector<RTDeviceBinaryImage> NativeDeviceBinaryImages;
163+
164+
struct AbstractModuleDesc {
165+
size_t NumJITBinaries;
166+
size_t NumNativeBinaries;
167+
RTDeviceBinaryImage *JITBinaries;
168+
RTDeviceBinaryImage *NativeBinaries;
169+
};
170+
171+
std::unique_ptr<AbstractModuleDesc[]> AbstractModuleDescriptors;
172+
std::unique_ptr<RTDeviceBinaryImage[]> BinaryImages;
161173
};
162174

163175
} // namespace detail

sycl/test-e2e/SYCLBIN/basic_input.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,10 @@
88

99
// REQUIRES: aspect-usm_device_allocations
1010

11+
// UNSUPPORTED: cuda, hip
12+
// UNSUPPORTED-INTENDED: CUDA and HIP targets produce only native device
13+
// binaries and can therefore not produce input-state SYCLBIN files.
14+
1115
// -- Basic test for compiling and loading a SYCLBIN kernel_bundle in input
1216
// -- state.
1317

sycl/test-e2e/SYCLBIN/basic_object.cpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,12 +8,13 @@
88

99
// REQUIRES: aspect-usm_device_allocations
1010

11+
// UNSUPPORTED: cuda, hip
12+
// UNSUPPORTED-INTENDED: CUDA and HIP targets produce only native device
13+
// binaries and can therefore not produce object-state SYCLBIN files.
14+
1115
// -- Basic test for compiling and loading a SYCLBIN kernel_bundle in object
1216
// -- state.
1317

14-
// UNSUPPORTED: hip
15-
// UNSUPPORTED-INTENDED: HIP backend does not implement linking.
16-
1718
// RUN: %clangxx --offload-new-driver -fsyclbin=object %{sycl_target_opts} %S/Inputs/basic_kernel.cpp -o %t.syclbin
1819
// RUN: %{build} -o %t.out
1920
// RUN: %{run} %t.out %t.syclbin

sycl/test-e2e/SYCLBIN/dae_input.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,10 @@
88

99
// REQUIRES: aspect-usm_device_allocations
1010

11+
// UNSUPPORTED: cuda, hip
12+
// UNSUPPORTED-INTENDED: CUDA and HIP targets produce only native device
13+
// binaries and can therefore not produce input-state SYCLBIN files.
14+
1115
// -- Test for using a kernel from a SYCLBIN with a dead argument.
1216

1317
// RUN: %clangxx --offload-new-driver -fsyclbin=input %{sycl_target_opts} %S/Inputs/dae_kernel.cpp -o %t.syclbin

sycl/test-e2e/SYCLBIN/dae_object.cpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,11 @@
88

99
// REQUIRES: aspect-usm_device_allocations
1010

11-
// -- Test for using a kernel from a SYCLBIN with a dead argument.
11+
// UNSUPPORTED: cuda, hip
12+
// UNSUPPORTED-INTENDED: CUDA and HIP targets produce only native device
13+
// binaries and can therefore not produce object-state SYCLBIN files.
1214

13-
// UNSUPPORTED: hip
14-
// UNSUPPORTED-INTENDED: HIP backend does not implement linking.
15+
// -- Test for using a kernel from a SYCLBIN with a dead argument.
1516

1617
// RUN: %clangxx --offload-new-driver -fsyclbin=object %{sycl_target_opts} %S/Inputs/dae_kernel.cpp -o %t.syclbin
1718
// RUN: %{build} -o %t.out

sycl/test-e2e/SYCLBIN/dg_input.cpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,14 +8,15 @@
88

99
// REQUIRES: aspect-usm_device_allocations
1010

11+
// UNSUPPORTED: cuda, hip
12+
// UNSUPPORTED-INTENDED: CUDA and HIP targets produce only native device
13+
// binaries and can therefore not produce input-state SYCLBIN files.
14+
1115
// -- Test for using device globals in SYCLBIN.
1216

1317
// UNSUPPORTED: opencl && gpu
1418
// UNSUPPORTED-TRACKER: GSD-4287
1519

16-
// UNSUPPORTED: cuda
17-
// UNSUPPORTED-TRACKER: https://github.com/intel/llvm/issues/19533
18-
1920
// RUN: %clangxx --offload-new-driver -fsyclbin=input %{sycl_target_opts} %S/Inputs/dg_kernel.cpp -o %t.syclbin
2021
// RUN: %{build} -o %t.out
2122
// RUN: %{run} %t.out %t.syclbin

sycl/test-e2e/SYCLBIN/dg_object.cpp

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8,17 +8,15 @@
88

99
// REQUIRES: aspect-usm_device_allocations
1010

11+
// UNSUPPORTED: cuda, hip
12+
// UNSUPPORTED-INTENDED: CUDA and HIP targets produce only native device
13+
// binaries and can therefore not produce object-state SYCLBIN files.
14+
1115
// -- Test for using device globals in SYCLBIN.
1216

1317
// UNSUPPORTED: opencl && gpu
1418
// UNSUPPORTED-TRACKER: GSD-4287
1519

16-
// UNSUPPORTED: hip
17-
// UNSUPPORTED-INTENDED: HIP backend does not implement linking.
18-
19-
// XFAIL: cuda
20-
// XFAIL-TRACKER: CMPLRLLVM-68859
21-
2220
// RUN: %clangxx --offload-new-driver -fsyclbin=object %{sycl_target_opts} %S/Inputs/dg_kernel.cpp -o %t.syclbin
2321
// RUN: %{build} -o %t.out
2422
// RUN: %{run} %t.out %t.syclbin

sycl/test-e2e/SYCLBIN/optional_kernel_features_input.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,10 @@
88

99
// REQUIRES: aspect-usm_device_allocations
1010

11+
// UNSUPPORTED: cuda, hip
12+
// UNSUPPORTED-INTENDED: CUDA and HIP targets produce only native device
13+
// binaries and can therefore not produce input-state SYCLBIN files.
14+
1115
// -- Test for compiling and loading a kernel bundle with a SYCLBIN containing
1216
// the use of optional kernel features.
1317

0 commit comments

Comments
 (0)