Skip to content

Commit 8030684

Browse files
author
iclsrc
committed
Merge from 'sycl' to 'sycl-web' (8 commits)
2 parents b499bf0 + c5d9aca commit 8030684

File tree

24 files changed

+472
-128
lines changed

24 files changed

+472
-128
lines changed

.github/CODEOWNERS

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -139,18 +139,18 @@ devops/dependencies-igc-dev.json @intel/sycl-matrix-reviewers @intel/dpcpp-esimd
139139
# Benchmarking scripts
140140
devops/scripts/benchmarks/ @intel/llvm-reviewers-benchmarking
141141

142-
# Kernel fusion JIT compiler
143-
sycl-jit/ @intel/dpcpp-kernel-fusion-reviewers
144-
sycl/doc/design/KernelFusionJIT.md @intel/dpcpp-kernel-fusion-reviewers
145-
sycl/doc/extensions/experimental/sycl_ext_codeplay_kernel_fusion.asciidoc @intel/dpcpp-kernel-fusion-reviewers
146-
sycl/include/sycl/ext/codeplay/experimental/fusion_properties.hpp @intel/dpcpp-kernel-fusion-reviewers
147-
sycl/include/sycl/ext/codeplay/experimental/fusion_wrapper.hpp @intel/dpcpp-kernel-fusion-reviewers
148-
sycl/source/detail/fusion/ @intel/dpcpp-kernel-fusion-reviewers
149-
sycl/source/detail/jit_compiler.hpp @intel/dpcpp-kernel-fusion-reviewers
150-
sycl/source/detail/jit_compiler.cpp @intel/dpcpp-kernel-fusion-reviewers
151-
sycl/source/detail/jit_device_binaries.hpp @intel/dpcpp-kernel-fusion-reviewers
152-
sycl/source/detail/jit_device_binaries.cpp @intel/dpcpp-kernel-fusion-reviewers
153-
sycl/test-e2e/KernelFusion @intel/dpcpp-kernel-fusion-reviewers
142+
# JIT compiler
143+
sycl-jit/ @intel/llvm-reviewers-runtime @cperkinsintel @aelovikov-intel
144+
sycl/doc/design/KernelFusionJIT.md @intel/llvm-reviewers-runtime @cperkinsintel @aelovikov-intel
145+
sycl/doc/extensions/experimental/sycl_ext_codeplay_kernel_fusion.asciidoc @intel/llvm-reviewers-runtime @cperkinsintel @aelovikov-intel
146+
sycl/include/sycl/ext/codeplay/experimental/fusion_properties.hpp @intel/llvm-reviewers-runtime @cperkinsintel @aelovikov-intel
147+
sycl/include/sycl/ext/codeplay/experimental/fusion_wrapper.hpp @intel/llvm-reviewers-runtime @cperkinsintel @aelovikov-intel
148+
sycl/source/detail/fusion/ @intel/llvm-reviewers-runtime @cperkinsintel @aelovikov-intel
149+
sycl/source/detail/jit_compiler.hpp @intel/llvm-reviewers-runtime @cperkinsintel @aelovikov-intel
150+
sycl/source/detail/jit_compiler.cpp @intel/llvm-reviewers-runtime @cperkinsintel @aelovikov-intel
151+
sycl/source/detail/jit_device_binaries.hpp @intel/llvm-reviewers-runtime @cperkinsintel @aelovikov-intel
152+
sycl/source/detail/jit_device_binaries.cpp @intel/llvm-reviewers-runtime @cperkinsintel @aelovikov-intel
153+
sycl/test-e2e/KernelFusion @intel/llvm-reviewers-runtime @cperkinsintel @aelovikov-intel
154154

155155
# Matrix
156156
sycl/include/sycl/ext/oneapi/matrix/ @intel/sycl-matrix-reviewers

devops/dependencies-igc-dev.json

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
{
22
"linux": {
33
"igc_dev": {
4-
"github_tag": "igc-dev-4e160b3",
5-
"version": "4e160b3",
6-
"updated_at": "2025-09-11T02:09:40Z",
7-
"url": "https://api.github.com/repos/intel/intel-graphics-compiler/actions/artifacts/3981554499/zip",
4+
"github_tag": "igc-dev-5a47189",
5+
"version": "5a47189",
6+
"updated_at": "2025-09-18T02:11:35Z",
7+
"url": "https://api.github.com/repos/intel/intel-graphics-compiler/actions/artifacts/4041051864/zip",
88
"root": "{DEPS_ROOT}/opencl/runtime/linux/oclgpu"
99
}
1010
}

sycl-jit/jit-compiler/include/Resource.h

Lines changed: 25 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -8,13 +8,29 @@
88

99
#pragma once
1010

11-
#include <iterator>
12-
#include <string_view>
13-
#include <utility>
14-
15-
namespace jit_compiler {
11+
namespace jit_compiler::resource {
12+
// `resource.cpp` is compiled using freshly built clang and it's very hard to
13+
// sync compilation options between that and normal compilation for other files.
14+
// Note that some of the options might affect ABI (e.g., libstdc++ vs. libc++
15+
// usage, or custom sysroot/gcc installation directory). A much easier approach
16+
// is to ensure that `resource.cpp` doesn't have any includes at all, hence
17+
// these helpers:
18+
template <class T, unsigned long long N>
19+
constexpr unsigned long long size(const T (&)[N]) noexcept {
20+
return N;
21+
}
22+
struct resource_string_view {
23+
template <unsigned long long N>
24+
resource_string_view(const char (&S)[N]) : S(S), Size(N - 1) {}
25+
const char *S;
26+
unsigned long long Size;
27+
};
28+
struct resource_file {
29+
resource_string_view Path;
30+
resource_string_view Content;
31+
};
1632
// Defined in the auto-generated file:
17-
extern const std::pair<std::string_view, std::string_view> ToolchainFiles[];
18-
extern size_t NumToolchainFiles;
19-
extern std::string_view ToolchainPrefix;
20-
} // namespace jit_compiler
33+
extern const resource_file ToolchainFiles[];
34+
extern unsigned long long NumToolchainFiles;
35+
extern resource_string_view ToolchainPrefix;
36+
} // namespace jit_compiler::resource

sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.cpp

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -100,8 +100,12 @@ class HashPreprocessedAction : public PreprocessorFrontendAction {
100100

101101
class SYCLToolchain {
102102
SYCLToolchain() {
103+
using namespace jit_compiler::resource;
104+
103105
for (size_t i = 0; i < NumToolchainFiles; ++i) {
104-
auto [Path, Content] = ToolchainFiles[i];
106+
resource_file RF = ToolchainFiles[i];
107+
std::string_view Path{RF.Path.S, RF.Path.Size};
108+
std::string_view Content{RF.Content.S, RF.Content.Size};
105109
ToolchainFS->addFile(Path, 0, llvm::MemoryBuffer::getMemBuffer(Content));
106110
}
107111
}
@@ -195,12 +199,14 @@ class SYCLToolchain {
195199
return std::move(Lib);
196200
}
197201

202+
std::string_view getPrefix() const { return Prefix; }
198203
std::string_view getClangXXExe() const { return ClangXXExe; }
199204

200205
private:
201206
clang::IgnoringDiagConsumer IgnoreDiag;
202-
std::string ClangXXExe =
203-
(jit_compiler::ToolchainPrefix + "/bin/clang++").str();
207+
std::string_view Prefix{jit_compiler::resource::ToolchainPrefix.S,
208+
jit_compiler::resource::ToolchainPrefix.Size};
209+
std::string ClangXXExe = (Prefix + "/bin/clang++").str();
204210
llvm::IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> ToolchainFS =
205211
llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
206212
};
@@ -499,7 +505,7 @@ Error jit_compiler::linkDeviceLibraries(llvm::Module &Module,
499505
LLVMContext &Context = Module.getContext();
500506
for (const std::string &LibName : LibNames) {
501507
std::string LibPath =
502-
(jit_compiler::ToolchainPrefix + "/lib/" + LibName).str();
508+
(SYCLToolchain::instance().getPrefix() + "/lib/" + LibName).str();
503509

504510
ModuleUPtr LibModule;
505511
if (auto Error = SYCLToolchain::instance()
@@ -518,7 +524,7 @@ Error jit_compiler::linkDeviceLibraries(llvm::Module &Module,
518524
// For GPU targets we need to link against vendor provided libdevice.
519525
if (IsCudaHIP) {
520526
Triple T{Module.getTargetTriple()};
521-
Driver D{(jit_compiler::ToolchainPrefix + "/bin/clang++").str(),
527+
Driver D{(SYCLToolchain::instance().getPrefix() + "/bin/clang++").str(),
522528
T.getTriple(), Diags};
523529
auto [CPU, Features] =
524530
Translator::getTargetCPUAndFeatureAttrs(&Module, "", Format);
@@ -764,7 +770,7 @@ jit_compiler::performPostLink(ModuleUPtr Module,
764770
auto &Ctx = Modules.front()->getContext();
765771
auto WrapLibraryInDevImg = [&](const std::string &LibName) -> Error {
766772
std::string LibPath =
767-
(jit_compiler::ToolchainPrefix + "/lib/" + LibName).str();
773+
(SYCLToolchain::instance().getPrefix() + "/lib/" + LibName).str();
768774
ModuleUPtr LibModule;
769775
if (auto Error = SYCLToolchain::instance()
770776
.loadBitcodeLibrary(LibPath, Ctx)

sycl-jit/jit-compiler/utils/generate.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,8 @@ def main():
2828
"""
2929
#include <Resource.h>
3030
31-
namespace jit_compiler {
32-
const std::pair<std::string_view, std::string_view> ToolchainFiles[] = {"""
31+
namespace jit_compiler::resource {
32+
const resource_file ToolchainFiles[] = {"""
3333
)
3434

3535
def process_file(file_path):
@@ -41,7 +41,7 @@ def process_file(file_path):
4141
static const char data[] = {{
4242
#embed "{file_path}" if_empty(0)
4343
, 0}};
44-
return std::string_view(data, std::size(data) - 1);
44+
return resource_string_view{{data}};
4545
}}()
4646
}},"""
4747
)
@@ -66,9 +66,9 @@ def process_dir(dir):
6666
f"""
6767
}};
6868
69-
size_t NumToolchainFiles = std::size(ToolchainFiles);
70-
std::string_view ToolchainPrefix = "{args.prefix}";
71-
}} // namespace jit_compiler
69+
unsigned long long NumToolchainFiles = size(ToolchainFiles);
70+
resource_string_view ToolchainPrefix{{"{args.prefix}"}};
71+
}} // namespace jit_compiler::resource
7272
"""
7373
)
7474

sycl/include/sycl/ext/oneapi/bindless_images.hpp

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1036,15 +1036,19 @@ DataT fetch_image(const sampled_image_handle &imageHandle [[maybe_unused]],
10361036
"HintT must always be a recognized standard type");
10371037

10381038
#ifdef __SYCL_DEVICE_ONLY__
1039+
// Convert the raw handle to an image and use FETCH_UNSAMPLED_IMAGE since
1040+
// fetch_image should not use the sampler
10391041
if constexpr (detail::is_recognized_standard_type<DataT>()) {
1040-
return FETCH_SAMPLED_IMAGE(
1042+
return FETCH_UNSAMPLED_IMAGE(
10411043
DataT,
1042-
CONVERT_HANDLE_TO_SAMPLED_IMAGE(imageHandle.raw_handle, coordSize),
1044+
CONVERT_HANDLE_TO_IMAGE(imageHandle.raw_handle,
1045+
detail::OCLImageTyRead<coordSize>),
10431046
coords);
10441047
} else {
1045-
return sycl::bit_cast<DataT>(FETCH_SAMPLED_IMAGE(
1048+
return sycl::bit_cast<DataT>(FETCH_UNSAMPLED_IMAGE(
10461049
HintT,
1047-
CONVERT_HANDLE_TO_SAMPLED_IMAGE(imageHandle.raw_handle, coordSize),
1050+
CONVERT_HANDLE_TO_IMAGE(imageHandle.raw_handle,
1051+
detail::OCLImageTyRead<coordSize>),
10481052
coords));
10491053
}
10501054
#else

sycl/source/detail/device_impl.hpp

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1580,16 +1580,17 @@ class device_impl : public std::enable_shared_from_this<device_impl> {
15801580
.value_or(0);
15811581
}
15821582
CASE(ext_oneapi_clock_sub_group) {
1583-
// Will be updated in a follow-up UR patch.
1584-
return false;
1583+
return get_info_impl_nocheck<UR_DEVICE_INFO_CLOCK_SUB_GROUP_SUPPORT_EXP>()
1584+
.value_or(0);
15851585
}
15861586
CASE(ext_oneapi_clock_work_group) {
1587-
// Will be updated in a follow-up UR patch.
1588-
return false;
1587+
return get_info_impl_nocheck<
1588+
UR_DEVICE_INFO_CLOCK_WORK_GROUP_SUPPORT_EXP>()
1589+
.value_or(0);
15891590
}
15901591
CASE(ext_oneapi_clock_device) {
1591-
// Will be updated in a follow-up UR patch.
1592-
return false;
1592+
return get_info_impl_nocheck<UR_DEVICE_INFO_CLOCK_DEVICE_SUPPORT_EXP>()
1593+
.value_or(0);
15931594
}
15941595
else {
15951596
return false; // This device aspect has not been implemented yet.

sycl/source/detail/ur_device_info_ret_types.inc

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -193,4 +193,7 @@ MAP(UR_DEVICE_INFO_MIPMAP_SUPPORT_EXP, ur_bool_t)
193193
MAP(UR_DEVICE_INFO_TIMESTAMP_RECORDING_SUPPORT_EXP, ur_bool_t)
194194
MAP(UR_DEVICE_INFO_KERNEL_LAUNCH_CAPABILITIES, ur_kernel_launch_properties_flags_t)
195195
MAP(UR_DEVICE_INFO_MEMORY_EXPORT_EXPORTABLE_DEVICE_MEM_EXP, ur_bool_t)
196+
MAP(UR_DEVICE_INFO_CLOCK_SUB_GROUP_SUPPORT_EXP, ur_bool_t)
197+
MAP(UR_DEVICE_INFO_CLOCK_WORK_GROUP_SUPPORT_EXP, ur_bool_t)
198+
MAP(UR_DEVICE_INFO_CLOCK_DEVICE_SUPPORT_EXP, ur_bool_t)
196199
// clang-format on

sycl/test-e2e/Basic/buffer/buffer_create.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,11 @@
88
// RUN: %{run} %t.out 2>&1 | FileCheck %s
99
// UNSUPPORTED: ze_debug
1010

11+
// L0v2 adapter doesn't optimize buffer creation based on device type yet
12+
// (integrated buffer implementation needs more work).
13+
// UNSUPPORTED: level_zero_v2_adapter
14+
// UNSUPPORTED-TRACKER: https://github.com/intel/llvm/issues/20121
15+
1116
#include <iostream>
1217
#include <level_zero/ze_api.h>
1318
#include <sycl/detail/core.hpp>

sycl/test-e2e/bindless_images/sampled_fetch/fetch_2D_USM_device.cpp

Lines changed: 37 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,10 @@
1111
#include <sycl/ext/oneapi/bindless_images.hpp>
1212
#include <sycl/usm.hpp>
1313

14-
class kernel_sampled_fetch;
14+
namespace {
1515

16-
int main() {
16+
template <typename T, sycl::image_channel_type ChanType>
17+
static int testSampledImageFetch() {
1718

1819
sycl::device dev;
1920
sycl::queue q(dev);
@@ -23,9 +24,9 @@ int main() {
2324
constexpr size_t width = 5;
2425
constexpr size_t height = 6;
2526
constexpr size_t N = width * height;
26-
std::vector<sycl::vec<uint16_t, 4>> out(N);
27-
std::vector<sycl::vec<uint16_t, 4>> expected(N);
28-
std::vector<sycl::vec<uint16_t, 4>> dataIn(N);
27+
std::vector<sycl::vec<T, 4>> out(N);
28+
std::vector<sycl::vec<T, 4>> expected(N);
29+
std::vector<sycl::vec<T, 4>> dataIn(N);
2930
for (int i = 0; i < width; i++) {
3031
for (int j = 0; j < height; j++) {
3132
auto index = i + (width * j);
@@ -43,8 +44,7 @@ int main() {
4344
sycl::filtering_mode::linear);
4445

4546
// Extension: image descriptor
46-
syclexp::image_descriptor desc({width, height}, 4,
47-
sycl::image_channel_type::unsigned_int16);
47+
syclexp::image_descriptor desc({width, height}, 4, ChanType);
4848
size_t pitch = 0;
4949

5050
// Extension: returns the device pointer to USM allocated pitched memory
@@ -65,21 +65,20 @@ int main() {
6565

6666
sycl::buffer buf(out.data(), sycl::range{height, width});
6767
q.submit([&](sycl::handler &cgh) {
68-
auto outAcc = buf.get_access<sycl::access_mode::write>(
68+
auto outAcc = buf.template get_access<sycl::access_mode::write>(
6969
cgh, sycl::range<2>{height, width});
7070

71-
cgh.parallel_for<kernel_sampled_fetch>(
72-
sycl::nd_range<2>{{width, height}, {width, height}},
73-
[=](sycl::nd_item<2> it) {
74-
size_t dim0 = it.get_local_id(0);
75-
size_t dim1 = it.get_local_id(1);
71+
cgh.parallel_for(sycl::nd_range<2>{{width, height}, {width, height}},
72+
[=](sycl::nd_item<2> it) {
73+
size_t dim0 = it.get_local_id(0);
74+
size_t dim1 = it.get_local_id(1);
7675

77-
// Extension: fetch data from sampled image handle
78-
auto px1 = syclexp::fetch_image<sycl::vec<uint16_t, 4>>(
79-
imgHandle, sycl::int2(dim0, dim1));
76+
// Extension: fetch data from sampled image handle
77+
auto px1 = syclexp::fetch_image<sycl::vec<T, 4>>(
78+
imgHandle, sycl::int2(dim0, dim1));
8079

81-
outAcc[sycl::id<2>{dim1, dim0}] = px1;
82-
});
80+
outAcc[sycl::id<2>{dim1, dim0}] = px1;
81+
});
8382
});
8483

8584
q.wait_and_throw();
@@ -121,3 +120,23 @@ int main() {
121120
std::cout << "Test failed!" << std::endl;
122121
return 3;
123122
}
123+
124+
} // namespace
125+
126+
int main() {
127+
if (int err =
128+
testSampledImageFetch<uint16_t,
129+
sycl::image_channel_type::unsigned_int16>()) {
130+
return err;
131+
}
132+
if (int err =
133+
testSampledImageFetch<uint32_t,
134+
sycl::image_channel_type::unsigned_int32>()) {
135+
return err;
136+
}
137+
if (int err =
138+
testSampledImageFetch<float, sycl::image_channel_type::fp32>()) {
139+
return err;
140+
}
141+
return 0;
142+
}

0 commit comments

Comments
 (0)