Skip to content

Commit d1d0081

Browse files
committed
Merge remote-tracking branch 'origin/sycl' into maronas/append-cache-controls
2 parents 6ea923b + 67b1719 commit d1d0081

File tree

27 files changed

+379
-277
lines changed

27 files changed

+379
-277
lines changed

.github/workflows/sycl-nightly.yml

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -201,11 +201,23 @@ jobs:
201201
always()
202202
&& !cancelled()
203203
&& needs.build-win.outputs.build_conclusion == 'success'
204+
strategy:
205+
fail-fast: false
206+
matrix:
207+
include:
208+
- name: Intel L0 Gen12 GPU
209+
runner: '["Windows", "gen12"]'
210+
target_devices: level_zero:gpu
211+
212+
- name: Intel L0 Battlemage GPU
213+
runner: '["Windows", "bmg"]'
214+
target_devices: level_zero:gpu
215+
204216
uses: ./.github/workflows/sycl-windows-run-tests.yml
205217
with:
206-
name: Intel GEN12 Graphics with Level Zero
207-
runner: '["Windows","gen12"]'
208-
target_devices: level_zero:gpu
218+
name: ${{ matrix.name }}
219+
runner: ${{ matrix.runner }}
220+
target_devices: ${{ matrix.target_devices }}
209221
toolchain_artifact_filename: ${{ needs.build-win.outputs.toolchain_artifact_filename }}
210222

211223
cuda-aws-start:

devops/scripts/benchmarks/html/scripts.js

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1606,6 +1606,12 @@ function fetchAndProcessData(url, isArchived = false) {
16061606
// Replace existing data for current data
16071607
loadedBenchmarkRuns = newRuns;
16081608
}
1609+
1610+
// The following variables have same values regardless of whether
1611+
// we load archived or current data
1612+
benchmarkMetadata = data.metadata || benchmarkMetadata || {};
1613+
benchmarkTags = data.tags || benchmarkTags || {};
1614+
16091615
initializeCharts();
16101616
})
16111617
.catch(error => {

sycl-jit/jit-compiler/CMakeLists.txt

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -36,11 +36,14 @@ add_custom_command(
3636
if (WIN32)
3737
set(clang_exe ${CMAKE_BINARY_DIR}/bin/clang-cl.exe)
3838
set(SYCL_JIT_RESOURCE_CXX_FLAGS /O2 /std:c++17 /W0)
39-
if (CMAKE_BUILD_TYPE MATCHES "Debug")
40-
list(APPEND SYCL_JIT_RESOURCE_CXX_FLAGS /MDd)
41-
else()
42-
list(APPEND SYCL_JIT_RESOURCE_CXX_FLAGS /MD)
39+
40+
# Determine if we should use static (/MT) or dynamic (/MD) runtime
41+
set(USE_STATIC_RUNTIME 0)
42+
if(CMAKE_MSVC_RUNTIME_LIBRARY AND NOT CMAKE_MSVC_RUNTIME_LIBRARY MATCHES "DLL")
43+
set(USE_STATIC_RUNTIME 1)
4344
endif()
45+
46+
list(APPEND SYCL_JIT_RESOURCE_CXX_FLAGS /M$<IF:${USE_STATIC_RUNTIME},T,D>$<$<CONFIG:Debug>:d>)
4447
else()
4548
get_host_tool_path( clang CLANG clang_exe clang_target )
4649
set(SYCL_JIT_RESOURCE_CXX_FLAGS -O2 -Wno-c23-extensions -std=c++17 -fPIC -fvisibility=hidden)
@@ -56,7 +59,12 @@ endif()
5659
add_custom_command(
5760
OUTPUT ${SYCL_JIT_RESOURCE_OBJ}
5861
COMMAND
59-
${clang_exe} ${SYCL_JIT_RESOURCE_CPP} -I ${CMAKE_CURRENT_SOURCE_DIR}/include -c -o ${SYCL_JIT_RESOURCE_OBJ} ${SYCL_JIT_RESOURCE_CXX_FLAGS}
62+
# ${clang_exe}'s default target is not necessarily ${LLVM_HOST_TRIPLE}: when
63+
# cross compiling, it will be whatever the host tools were configured with,
64+
# and when building a cross compiler, it will be
65+
# ${LLVM_DEFAULT_TARGET_TRIPLE}. Rather than special casing these, just always
66+
# specify --target=${LLVM_HOST_TRIPLE}.
67+
${clang_exe} --target=${LLVM_HOST_TRIPLE} ${SYCL_JIT_RESOURCE_CPP} -I ${CMAKE_CURRENT_SOURCE_DIR}/include -c -o ${SYCL_JIT_RESOURCE_OBJ} ${SYCL_JIT_RESOURCE_CXX_FLAGS}
6068
DEPENDS
6169
${SYCL_JIT_RESOURCE_CPP}
6270
${CMAKE_CURRENT_SOURCE_DIR}/include/Resource.h

sycl/doc/extensions/experimental/sycl_ext_oneapi_enqueue_functions.asciidoc

Lines changed: 8 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -386,11 +386,11 @@ a!
386386
----
387387
namespace sycl::ext::oneapi::experimental {
388388
389-
template <typename KernelName, int Dimensions, typename... Args>
389+
template <int Dimensions, typename... Args>
390390
void parallel_for(sycl::queue q, sycl::range<Dimensions> r,
391391
const sycl::kernel& k, Args&&... args);
392392
393-
template <typename KernelName, int Dimensions, typename... Args>
393+
template <int Dimensions, typename... Args>
394394
void parallel_for(sycl::handler &h, sycl::range<Dimensions> r,
395395
const sycl::kernel& k, Args&&... args);
396396
@@ -409,14 +409,12 @@ a!
409409
----
410410
namespace sycl::ext::oneapi::experimental {
411411
412-
template <typename KernelName, int Dimensions,
413-
typename Properties, typename... Args>
412+
template <int Dimensions, typename Properties, typename... Args>
414413
void parallel_for(sycl::queue q,
415414
launch_config<sycl::range<Dimensions>, Properties> c,
416415
const sycl::kernel& k, Args&& args...);
417416
418-
template <typename KernelName, int Dimensions,
419-
typename Properties, typename... Args>
417+
template <int Dimensions, typename Properties, typename... Args>
420418
void parallel_for(sycl::handler &h,
421419
launch_config<sycl::range<Dimensions>, Properties> c,
422420
const sycl::kernel& k, Args&& args...);
@@ -503,11 +501,11 @@ a!
503501
----
504502
namespace sycl::ext::oneapi::experimental {
505503
506-
template <typename KernelName, int Dimensions, typename... Args>
504+
template <int Dimensions, typename... Args>
507505
void nd_launch(sycl::queue q, sycl::nd_range<Dimensions> r,
508506
const sycl::kernel& k, Args&&... args);
509507
510-
template <typename KernelName, int Dimensions, typename... Args>
508+
template <int Dimensions, typename... Args>
511509
void nd_launch(sycl::handler &h, sycl::nd_range<Dimensions> r,
512510
const sycl::kernel& k, Args&&... args);
513511
@@ -527,14 +525,12 @@ a!
527525
----
528526
namespace sycl::ext::oneapi::experimental {
529527
530-
template <typename KernelName, int Dimensions,
531-
typename Properties, typename... Args>
528+
template <int Dimensions, typename Properties, typename... Args>
532529
void nd_launch(sycl::queue q,
533530
launch_config<sycl::nd_range<Dimensions>, Properties> c,
534531
const sycl::kernel& k, Args&& args...);
535532
536-
template <typename KernelName, int Dimensions,
537-
typename Properties, typename... Args>
533+
template <int Dimensions, typename Properties, typename... Args>
538534
void nd_launch(sycl::handler &h,
539535
launch_config<sycl::nd_range<Dimensions>, Properties> c,
540536
const sycl::kernel& k, Args&& args...);

sycl/source/detail/cg.hpp

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@
2020
#include <sycl/kernel.hpp> // for kernel_impl
2121
#include <sycl/kernel_bundle.hpp> // for kernel_bundle_impl
2222

23+
#include <detail/device_kernel_info.hpp>
24+
2325
#include <assert.h> // for assert
2426
#include <memory> // for shared_ptr, unique_ptr
2527
#include <stddef.h> // for size_t
@@ -253,7 +255,6 @@ class CGExecKernel : public CG {
253255
std::shared_ptr<detail::kernel_impl> MSyclKernel;
254256
std::shared_ptr<detail::kernel_bundle_impl> MKernelBundle;
255257
std::vector<ArgDesc> MArgs;
256-
KernelNameStrT MKernelName;
257258
DeviceKernelInfo &MDeviceKernelInfo;
258259
std::vector<std::shared_ptr<detail::stream_impl>> MStreams;
259260
std::vector<std::shared_ptr<const void>> MAuxiliaryResources;
@@ -269,7 +270,7 @@ class CGExecKernel : public CG {
269270
std::shared_ptr<detail::kernel_impl> SyclKernel,
270271
std::shared_ptr<detail::kernel_bundle_impl> KernelBundle,
271272
CG::StorageInitHelper CGData, std::vector<ArgDesc> Args,
272-
KernelNameStrT KernelName, DeviceKernelInfo &DeviceKernelInfo,
273+
DeviceKernelInfo &DeviceKernelInfo,
273274
std::vector<std::shared_ptr<detail::stream_impl>> Streams,
274275
std::vector<std::shared_ptr<const void>> AuxiliaryResources,
275276
CGType Type, ur_kernel_cache_config_t KernelCacheConfig,
@@ -278,8 +279,7 @@ class CGExecKernel : public CG {
278279
: CG(Type, std::move(CGData), std::move(loc)), MNDRDesc(NDRDesc),
279280
MHostKernel(std::move(HKernel)), MSyclKernel(std::move(SyclKernel)),
280281
MKernelBundle(std::move(KernelBundle)), MArgs(std::move(Args)),
281-
MKernelName(std::move(KernelName)), MDeviceKernelInfo(DeviceKernelInfo),
282-
MStreams(std::move(Streams)),
282+
MDeviceKernelInfo(DeviceKernelInfo), MStreams(std::move(Streams)),
283283
MAuxiliaryResources(std::move(AuxiliaryResources)),
284284
MAlternativeKernels{}, MKernelCacheConfig(std::move(KernelCacheConfig)),
285285
MKernelIsCooperative(KernelIsCooperative),
@@ -291,7 +291,9 @@ class CGExecKernel : public CG {
291291
CGExecKernel(const CGExecKernel &CGExec) = default;
292292

293293
const std::vector<ArgDesc> &getArguments() const { return MArgs; }
294-
KernelNameStrRefT getKernelName() const { return MKernelName; }
294+
std::string_view getKernelName() const {
295+
return static_cast<std::string_view>(MDeviceKernelInfo.Name);
296+
}
295297
const std::vector<std::shared_ptr<detail::stream_impl>> &getStreams() const {
296298
return MStreams;
297299
}

sycl/source/detail/graph/graph_impl.cpp

Lines changed: 9 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -739,9 +739,8 @@ ur_exp_command_buffer_sync_point_t exec_graph_impl::enqueueNodeDirect(
739739
CGExec->MLine, CGExec->MColumn);
740740
std::tie(CmdTraceEvent, InstanceID) = emitKernelInstrumentationData(
741741
sycl::detail::GSYCLStreamID, CGExec->MSyclKernel, CodeLoc,
742-
CGExec->MIsTopCodeLoc, CGExec->MKernelName.data(),
743-
CGExec->MDeviceKernelInfo, nullptr, CGExec->MNDRDesc,
744-
CGExec->MKernelBundle.get(), CGExec->MArgs);
742+
CGExec->MIsTopCodeLoc, CGExec->MDeviceKernelInfo, nullptr,
743+
CGExec->MNDRDesc, CGExec->MKernelBundle.get(), CGExec->MArgs);
745744
if (CmdTraceEvent)
746745
sycl::detail::emitInstrumentationGeneral(sycl::detail::GSYCLStreamID,
747746
InstanceID, CmdTraceEvent,
@@ -1401,14 +1400,14 @@ void exec_graph_impl::update(std::shared_ptr<graph_impl> GraphImpl) {
14011400
sycl::detail::CGExecKernel *TargetCGExec =
14021401
static_cast<sycl::detail::CGExecKernel *>(
14031402
MNodeStorage[i]->MCommandGroup.get());
1404-
KernelNameStrRefT TargetKernelName = TargetCGExec->getKernelName();
1403+
std::string_view TargetKernelName = TargetCGExec->getKernelName();
14051404

14061405
sycl::detail::CGExecKernel *SourceCGExec =
14071406
static_cast<sycl::detail::CGExecKernel *>(
14081407
GraphImpl->MNodeStorage[i]->MCommandGroup.get());
1409-
KernelNameStrRefT SourceKernelName = SourceCGExec->getKernelName();
1408+
std::string_view SourceKernelName = SourceCGExec->getKernelName();
14101409

1411-
if (TargetKernelName.compare(SourceKernelName) != 0) {
1410+
if (TargetKernelName != SourceKernelName) {
14121411
std::stringstream ErrorStream(
14131412
"Cannot update using a graph with mismatched kernel "
14141413
"types. Source node type ");
@@ -1568,14 +1567,14 @@ void exec_graph_impl::populateURKernelUpdateStructs(
15681567
UrKernel = Kernel->getHandleRef();
15691568
EliminatedArgMask = Kernel->getKernelArgMask();
15701569
} else if (auto SyclKernelImpl =
1571-
KernelBundleImplPtr
1572-
? KernelBundleImplPtr->tryGetKernel(ExecCG.MKernelName)
1573-
: std::shared_ptr<kernel_impl>{nullptr}) {
1570+
KernelBundleImplPtr ? KernelBundleImplPtr->tryGetKernel(
1571+
ExecCG.MDeviceKernelInfo.Name)
1572+
: std::shared_ptr<kernel_impl>{nullptr}) {
15741573
UrKernel = SyclKernelImpl->getHandleRef();
15751574
EliminatedArgMask = SyclKernelImpl->getKernelArgMask();
15761575
} else {
15771576
BundleObjs = sycl::detail::ProgramManager::getInstance().getOrCreateKernel(
1578-
ContextImpl, DeviceImpl, ExecCG.MKernelName, ExecCG.MDeviceKernelInfo);
1577+
ContextImpl, DeviceImpl, ExecCG.MDeviceKernelInfo);
15791578
UrKernel = BundleObjs->MKernelHandle;
15801579
EliminatedArgMask = BundleObjs->MKernelArgMask;
15811580
}

sycl/source/detail/graph/node_impl.hpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -341,7 +341,7 @@ class node_impl : public std::enable_shared_from_this<node_impl> {
341341
static_cast<sycl::detail::CGExecKernel *>(MCommandGroup.get());
342342
sycl::detail::CGExecKernel *ExecKernelB =
343343
static_cast<sycl::detail::CGExecKernel *>(Node.MCommandGroup.get());
344-
return ExecKernelA->MKernelName.compare(ExecKernelB->MKernelName) == 0;
344+
return ExecKernelA->getKernelName() == ExecKernelB->getKernelName();
345345
}
346346
case sycl::detail::CGType::CopyUSM: {
347347
sycl::detail::CGCopyUSM *CopyA =
@@ -543,7 +543,7 @@ class node_impl : public std::enable_shared_from_this<node_impl> {
543543
Stream << "CGExecKernel \\n";
544544
sycl::detail::CGExecKernel *Kernel =
545545
static_cast<sycl::detail::CGExecKernel *>(MCommandGroup.get());
546-
Stream << "NAME = " << Kernel->MKernelName << "\\n";
546+
Stream << "NAME = " << Kernel->getKernelName() << "\\n";
547547
if (Verbose) {
548548
Stream << "ARGS = \\n";
549549
for (size_t i = 0; i < Kernel->MArgs.size(); i++) {

sycl/source/detail/handler_impl.hpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,11 @@ class handler_impl {
6060
HandlerSubmissionState::EXPLICIT_KERNEL_BUNDLE_STATE;
6161
}
6262

63+
KernelNameStrRefT getKernelName() const {
64+
assert(MDeviceKernelInfoPtr);
65+
return static_cast<KernelNameStrRefT>(MDeviceKernelInfoPtr->Name);
66+
}
67+
6368
/// Registers mutually exclusive submission states.
6469
HandlerSubmissionState MSubmissionState = HandlerSubmissionState::NO_STATE;
6570

sycl/source/detail/program_manager/program_manager.cpp

Lines changed: 21 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1084,25 +1084,32 @@ ProgramManager::getBuiltURProgram(const BinImgWithDeps &ImgWithDeps,
10841084

10851085
FastKernelCacheValPtr ProgramManager::getOrCreateKernel(
10861086
context_impl &ContextImpl, device_impl &DeviceImpl,
1087-
KernelNameStrRefT KernelName, DeviceKernelInfo &DeviceKernelInfo,
1088-
const NDRDescT &NDRDesc) {
1087+
DeviceKernelInfo &DeviceKernelInfo, const NDRDescT &NDRDesc) {
10891088
if constexpr (DbgProgMgr > 0) {
10901089
std::cerr << ">>> ProgramManager::getOrCreateKernel(" << &ContextImpl
1091-
<< ", " << &DeviceImpl << ", " << KernelName << ")\n";
1090+
<< ", " << &DeviceImpl << ", "
1091+
<< static_cast<std::string_view>(DeviceKernelInfo.Name) << ")\n";
10921092
}
10931093

10941094
KernelProgramCache &Cache = ContextImpl.getKernelProgramCache();
10951095
ur_device_handle_t UrDevice = DeviceImpl.getHandleRef();
10961096
if (SYCLConfig<SYCL_CACHE_IN_MEM>::get()) {
1097-
if (auto KernelCacheValPtr = Cache.tryToGetKernelFast(
1098-
KernelName, UrDevice, DeviceKernelInfo.getKernelSubcache())) {
1097+
if (auto KernelCacheValPtr =
1098+
Cache.tryToGetKernelFast(DeviceKernelInfo.Name, UrDevice,
1099+
DeviceKernelInfo.getKernelSubcache())) {
10991100
return KernelCacheValPtr;
11001101
}
11011102
}
11021103

1103-
Managed<ur_program_handle_t> Program =
1104-
getBuiltURProgram(ContextImpl, DeviceImpl, KernelName, NDRDesc);
1104+
Managed<ur_program_handle_t> Program = getBuiltURProgram(
1105+
ContextImpl, DeviceImpl, DeviceKernelInfo.Name, NDRDesc);
11051106

1107+
#ifdef __INTEL_PREVIEW_BREAKING_CHANGES
1108+
// Simplify this once `DeviceKernelInfo.Name`'s type is known.
1109+
// Using `decltype(auto)` instead of just `auto` to get reference when
1110+
// possible.
1111+
#endif
1112+
decltype(auto) KernelName = KernelNameStrRefT{DeviceKernelInfo.Name};
11061113
auto BuildF = [this, &Program, &KernelName, &ContextImpl] {
11071114
adapter_impl &Adapter = ContextImpl.getAdapter();
11081115
Managed<ur_kernel_handle_t> Kernel{Adapter};
@@ -1125,7 +1132,8 @@ FastKernelCacheValPtr ProgramManager::getOrCreateKernel(
11251132
return std::make_pair(std::move(Kernel), ArgMask);
11261133
};
11271134

1128-
auto GetCachedBuildF = [&Cache, &KernelName, &Program]() {
1135+
auto GetCachedBuildF = [&Cache, &KernelName = DeviceKernelInfo.Name,
1136+
&Program]() {
11291137
return Cache.getOrInsertKernel(Program, KernelName);
11301138
};
11311139

@@ -1147,7 +1155,7 @@ FastKernelCacheValPtr ProgramManager::getOrCreateKernel(
11471155
auto ret_val = std::make_shared<FastKernelCacheVal>(
11481156
KernelArgMaskPair.first.retain(), &(BuildResult->MBuildResultMutex),
11491157
KernelArgMaskPair.second, std::move(Program), ContextImpl.getAdapter());
1150-
Cache.saveKernel(KernelName, UrDevice, ret_val,
1158+
Cache.saveKernel(DeviceKernelInfo.Name, UrDevice, ret_val,
11511159
DeviceKernelInfo.getKernelSubcache());
11521160
return ret_val;
11531161
}
@@ -1823,10 +1831,11 @@ ProgramManager::kernelImplicitLocalArgPos(KernelNameStrRefT KernelName) const {
18231831
DeviceKernelInfo &ProgramManager::getOrCreateDeviceKernelInfo(
18241832
const CompileTimeKernelInfoTy &Info) {
18251833
std::lock_guard<std::mutex> Guard(m_DeviceKernelInfoMapMutex);
1826-
auto Result =
1834+
auto [Iter, Inserted] =
18271835
m_DeviceKernelInfoMap.try_emplace(KernelNameStrT{Info.Name.data()}, Info);
1828-
Result.first->second.setCompileTimeInfoIfNeeded(Info);
1829-
return Result.first->second;
1836+
if (!Inserted)
1837+
Iter->second.setCompileTimeInfoIfNeeded(Info);
1838+
return Iter->second;
18301839
}
18311840

18321841
DeviceKernelInfo &

sycl/source/detail/program_manager/program_manager.hpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -200,7 +200,6 @@ class ProgramManager {
200200

201201
FastKernelCacheValPtr getOrCreateKernel(context_impl &ContextImpl,
202202
device_impl &DeviceImpl,
203-
KernelNameStrRefT KernelName,
204203
DeviceKernelInfo &DeviceKernelInfo,
205204
const NDRDescT &NDRDesc = {});
206205

0 commit comments

Comments
 (0)