Skip to content

Commit 39e9bb5

Browse files
author
iclsrc
committed
Merge from 'sycl' to 'sycl-web' (7 commits)
2 parents 57111d4 + 753ab35 commit 39e9bb5

File tree

20 files changed

+164
-122
lines changed

20 files changed

+164
-122
lines changed

devops/dependencies-igc-dev.json

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
{
22
"linux": {
33
"igc_dev": {
4-
"github_tag": "igc-dev-e146785",
5-
"version": "e146785",
6-
"updated_at": "2025-10-02T03:05:40Z",
7-
"url": "https://api.github.com/repos/intel/intel-graphics-compiler/actions/artifacts/4161218080/zip",
4+
"github_tag": "igc-dev-e4b64c1",
5+
"version": "e4b64c1",
6+
"updated_at": "2025-10-05T10:41:23Z",
7+
"url": "https://api.github.com/repos/intel/intel-graphics-compiler/actions/artifacts/4185473239/zip",
88
"root": "{DEPS_ROOT}/opencl/runtime/linux/oclgpu"
99
}
1010
}

devops/scripts/benchmarks/benches/compute.py

Lines changed: 48 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -3,19 +3,19 @@
33
# See LICENSE.TXT
44
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
55

6-
from itertools import product
6+
import copy
77
import csv
88
import io
9-
import copy
109
import math
1110
from enum import Enum
11+
from itertools import product
1212
from pathlib import Path
1313

14-
from .base import Benchmark, Suite, TracingType
15-
from utils.result import BenchmarkMetadata, Result
16-
from .base import Benchmark, Suite
17-
from options import options
1814
from git_project import GitProject
15+
from options import options
16+
from utils.result import BenchmarkMetadata, Result
17+
18+
from .base import Benchmark, Suite, TracingType
1919

2020

2121
class RUNTIMES(Enum):
@@ -100,66 +100,57 @@ def setup(self) -> None:
100100

101101
def additional_metadata(self) -> dict[str, BenchmarkMetadata]:
102102
metadata = {
103-
"SubmitKernel": BenchmarkMetadata(
104-
type="group",
105-
description="Measures CPU time overhead of submitting kernels through different APIs.",
106-
notes="Each layer builds on top of the previous layer, adding functionality and overhead.\n"
107-
"The first layer is the Level Zero API, the second is the Unified Runtime API, and the third is the SYCL API.\n"
108-
"The UR v2 adapter noticeably reduces UR layer overhead, also improving SYCL performance.\n"
109-
"Work is ongoing to reduce the overhead of the SYCL API\n",
110-
tags=["submit", "micro", "SYCL", "UR", "L0"],
111-
range_min=0.0,
112-
),
113103
"SinKernelGraph": BenchmarkMetadata(
114104
type="group",
115105
unstable="This benchmark combines both eager and graph execution, and may not be representative of real use cases.",
116106
tags=["submit", "memory", "proxy", "SYCL", "UR", "L0", "graph"],
117107
),
118-
"SubmitGraph": BenchmarkMetadata(
119-
type="group", tags=["submit", "micro", "SYCL", "UR", "L0", "graph"]
120-
),
121108
"FinalizeGraph": BenchmarkMetadata(
122109
type="group", tags=["finalize", "micro", "SYCL", "graph"]
123110
),
124111
}
125112

126113
# Add metadata for all SubmitKernel group variants
127-
base_metadata = metadata["SubmitKernel"]
128-
114+
submit_kernel_metadata = BenchmarkMetadata(
115+
type="group",
116+
notes="Each layer builds on top of the previous layer, adding functionality and overhead.\n"
117+
"The first layer is the Level Zero API, the second is the Unified Runtime API, and the third is the SYCL API.\n"
118+
"The UR v2 adapter noticeably reduces UR layer overhead, also improving SYCL performance.\n"
119+
"Work is ongoing to reduce the overhead of the SYCL API\n",
120+
tags=["submit", "micro", "SYCL", "UR", "L0"],
121+
range_min=0.0,
122+
)
129123
for order in ["in order", "out of order"]:
130124
for completion in ["", " with completion"]:
131125
for events in ["", " using events"]:
132126
group_name = f"SubmitKernel {order}{completion}{events} long kernel"
133-
metadata[group_name] = BenchmarkMetadata(
134-
type="group",
135-
description=f"Measures CPU time overhead of submitting {order} kernels with longer execution times through different APIs.",
136-
notes=base_metadata.notes,
137-
tags=base_metadata.tags,
138-
range_min=base_metadata.range_min,
127+
metadata[group_name] = copy.deepcopy(submit_kernel_metadata)
128+
metadata[group_name].description = (
129+
f"Measures CPU time overhead of submitting {order} kernels with longer execution times through different APIs."
139130
)
140-
141131
# CPU count variants
142132
cpu_count_group = f"{group_name}, CPU count"
143-
metadata[cpu_count_group] = BenchmarkMetadata(
144-
type="group",
145-
description=f"Measures CPU time overhead of submitting {order} kernels with longer execution times through different APIs.",
146-
notes=base_metadata.notes,
147-
tags=base_metadata.tags,
148-
range_min=base_metadata.range_min,
133+
metadata[cpu_count_group] = copy.deepcopy(submit_kernel_metadata)
134+
metadata[cpu_count_group].description = (
135+
f"Measures CPU instruction count overhead of submitting {order} kernels with longer execution times through different APIs."
149136
)
150137

151138
# Add metadata for all SubmitGraph group variants
152-
base_metadata = metadata["SubmitGraph"]
139+
submit_graph_metadata = BenchmarkMetadata(
140+
type="group", tags=["submit", "micro", "SYCL", "UR", "L0", "graph"]
141+
)
153142
for order in ["in order", "out of order"]:
154143
for completion in ["", " with completion"]:
155144
for events in ["", " using events"]:
156145
for num_kernels in self.submit_graph_num_kernels:
157-
group_name = f"SubmitGraph {order}{completion}{events}, {num_kernels} kernels"
158-
metadata[group_name] = BenchmarkMetadata(
159-
type="group",
160-
tags=base_metadata.tags,
161-
)
162-
146+
for host_tasks in ["", " use host tasks"]:
147+
group_name = f"SubmitGraph {order}{completion}{events}{host_tasks}, {num_kernels} kernels"
148+
metadata[group_name] = copy.deepcopy(submit_graph_metadata)
149+
# CPU count variants
150+
cpu_count_group = f"{group_name}, CPU count"
151+
metadata[cpu_count_group] = copy.deepcopy(
152+
submit_graph_metadata
153+
)
163154
return metadata
164155

165156
def benchmarks(self) -> list[Benchmark]:
@@ -1088,6 +1079,22 @@ def bin_args(self, run_trace: TracingType = TracingType.NONE) -> list[str]:
10881079
bin_args.append(f"--profilerType={self.profiler_type.value}")
10891080
return bin_args
10901081

1082+
def get_metadata(self) -> dict[str, BenchmarkMetadata]:
1083+
metadata_dict = super().get_metadata()
1084+
1085+
# Create CPU count variant with modified display name and explicit_group
1086+
cpu_count_name = self.name() + " CPU count"
1087+
cpu_count_metadata = copy.deepcopy(metadata_dict[self.name()])
1088+
cpu_count_display_name = self.display_name() + ", CPU count"
1089+
cpu_count_explicit_group = (
1090+
self.explicit_group() + ", CPU count" if self.explicit_group() else ""
1091+
)
1092+
cpu_count_metadata.display_name = cpu_count_display_name
1093+
cpu_count_metadata.explicit_group = cpu_count_explicit_group
1094+
metadata_dict[cpu_count_name] = cpu_count_metadata
1095+
1096+
return metadata_dict
1097+
10911098

10921099
class UllsEmptyKernel(ComputeBenchmark):
10931100
def __init__(

devops/scripts/benchmarks/html/scripts.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -345,7 +345,7 @@ function createChart(data, containerId, type) {
345345
if (elements.length > 0) {
346346
const point = elements[0].element.$context.raw;
347347
if (point.gitHash && point.gitRepo) {
348-
window.open(`https://github.com/${point.gitRepo}/commit/${point.gitHash}`, '_blank');
348+
window.open(`${point.gitRepo}/commit/${point.gitHash}`, '_blank');
349349
}
350350
}
351351
};

sycl/source/detail/program_manager/program_manager.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -164,7 +164,7 @@ static bool isDeviceBinaryTypeSupported(context_impl &ContextImpl,
164164
[[maybe_unused]] auto VecToString = [](auto &Vec) -> std::string {
165165
std::ostringstream Out;
166166
Out << "{";
167-
for (auto Elem : Vec)
167+
for (const auto &Elem : Vec)
168168
Out << Elem << " ";
169169
Out << "}";
170170
return Out.str();

sycl/source/detail/queue_impl.cpp

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -420,6 +420,61 @@ queue_impl::submit_impl(const detail::type_erased_cgfo_ty &CGF,
420420
return EventImpl;
421421
}
422422

423+
EventImplPtr queue_impl::submit_command_to_graph(
424+
ext::oneapi::experimental::detail::graph_impl &GraphImpl,
425+
std::unique_ptr<detail::CG> CommandGroup, sycl::detail::CGType CGType,
426+
sycl::ext::oneapi::experimental::node_type UserFacingNodeType) {
427+
auto EventImpl = detail::event_impl::create_completed_host_event();
428+
EventImpl->setSubmittedQueue(weak_from_this());
429+
ext::oneapi::experimental::detail::node_impl *NodeImpl = nullptr;
430+
431+
// GraphImpl is read and written in this scope so we lock this graph
432+
// with full priviledges.
433+
ext::oneapi::experimental::detail::graph_impl::WriteLock Lock(
434+
GraphImpl.MMutex);
435+
436+
ext::oneapi::experimental::node_type NodeType =
437+
UserFacingNodeType != ext::oneapi::experimental::node_type::empty
438+
? UserFacingNodeType
439+
: ext::oneapi::experimental::detail::getNodeTypeFromCG(CGType);
440+
441+
// Create a new node in the graph representing this command-group
442+
if (isInOrder()) {
443+
// In-order queues create implicit linear dependencies between nodes.
444+
// Find the last node added to the graph from this queue, so our new
445+
// node can set it as a predecessor.
446+
std::vector<ext::oneapi::experimental::detail::node_impl *> Deps;
447+
if (ext::oneapi::experimental::detail::node_impl *DependentNode =
448+
GraphImpl.getLastInorderNode(this)) {
449+
Deps.push_back(DependentNode);
450+
}
451+
NodeImpl = &GraphImpl.add(NodeType, std::move(CommandGroup), Deps);
452+
453+
// If we are recording an in-order queue remember the new node, so it
454+
// can be used as a dependency for any more nodes recorded from this
455+
// queue.
456+
GraphImpl.setLastInorderNode(*this, *NodeImpl);
457+
} else {
458+
ext::oneapi::experimental::detail::node_impl *LastBarrierRecordedFromQueue =
459+
GraphImpl.getBarrierDep(weak_from_this());
460+
std::vector<ext::oneapi::experimental::detail::node_impl *> Deps;
461+
462+
if (LastBarrierRecordedFromQueue) {
463+
Deps.push_back(LastBarrierRecordedFromQueue);
464+
}
465+
NodeImpl = &GraphImpl.add(NodeType, std::move(CommandGroup), Deps);
466+
467+
if (NodeImpl->MCGType == sycl::detail::CGType::Barrier) {
468+
GraphImpl.setBarrierDep(weak_from_this(), *NodeImpl);
469+
}
470+
}
471+
472+
// Associate an event with this new node and return the event.
473+
GraphImpl.addEventForNode(EventImpl, *NodeImpl);
474+
475+
return EventImpl;
476+
}
477+
423478
detail::EventImplPtr queue_impl::submit_kernel_direct_impl(
424479
const NDRDescT &NDRDesc, detail::HostKernelRefBase &HostKernel,
425480
detail::DeviceKernelInfo *DeviceKernelInfo, bool CallerNeedsEvent,
@@ -456,6 +511,11 @@ detail::EventImplPtr queue_impl::submit_kernel_direct_impl(
456511
CodeLoc));
457512
CommandGroup->MIsTopCodeLoc = IsTopCodeLoc;
458513

514+
if (auto GraphImpl = getCommandGraph(); GraphImpl) {
515+
return submit_command_to_graph(*GraphImpl, std::move(CommandGroup),
516+
detail::CGType::Kernel);
517+
}
518+
459519
return detail::Scheduler::getInstance().addCG(std::move(CommandGroup),
460520
*this, true);
461521
};

sycl/source/detail/queue_impl.hpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -624,6 +624,12 @@ class queue_impl : public std::enable_shared_from_this<queue_impl> {
624624

625625
bool hasCommandGraph() const { return !MGraph.expired(); }
626626

627+
EventImplPtr submit_command_to_graph(
628+
ext::oneapi::experimental::detail::graph_impl &GraphImpl,
629+
std::unique_ptr<detail::CG> CommandGroup, sycl::detail::CGType CGType,
630+
sycl::ext::oneapi::experimental::node_type UserFacingNodeType =
631+
ext::oneapi::experimental::node_type::empty);
632+
627633
unsigned long long getQueueID() { return MQueueID; }
628634

629635
void *getTraceEvent() { return MTraceEvent; }

sycl/source/detail/scheduler/commands.cpp

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2468,14 +2468,16 @@ static ur_result_t SetKernelParamsAndLaunch(
24682468
/* pPropSizeRet = */ nullptr);
24692469

24702470
const bool EnforcedLocalSize =
2471-
(RequiredWGSize[0] != 0 || RequiredWGSize[1] != 0 ||
2472-
RequiredWGSize[2] != 0);
2471+
(RequiredWGSize[0] != 0 &&
2472+
(NDRDesc.Dims < 2 || RequiredWGSize[1] != 0) &&
2473+
(NDRDesc.Dims < 3 || RequiredWGSize[2] != 0));
24732474
if (EnforcedLocalSize)
24742475
LocalSize = RequiredWGSize;
24752476
}
2476-
const bool HasOffset = NDRDesc.GlobalOffset[0] != 0 ||
2477-
NDRDesc.GlobalOffset[1] != 0 ||
2478-
NDRDesc.GlobalOffset[2] != 0;
2477+
2478+
const bool HasOffset = NDRDesc.GlobalOffset[0] != 0 &&
2479+
(NDRDesc.Dims < 2 || NDRDesc.GlobalOffset[1] != 0) &&
2480+
(NDRDesc.Dims < 3 || NDRDesc.GlobalOffset[2] != 0);
24792481

24802482
std::vector<ur_kernel_launch_property_t> property_list;
24812483

@@ -2610,6 +2612,10 @@ ur_result_t enqueueImpCommandBufferKernel(
26102612
size_t RequiredWGSize[3] = {0, 0, 0};
26112613
size_t *LocalSize = nullptr;
26122614

2615+
const bool HasOffset = NDRDesc.GlobalOffset[0] != 0 &&
2616+
(NDRDesc.Dims < 2 || NDRDesc.GlobalOffset[1] != 0) &&
2617+
(NDRDesc.Dims < 3 || NDRDesc.GlobalOffset[2] != 0);
2618+
26132619
if (HasLocalSize)
26142620
LocalSize = &NDRDesc.LocalSize[0];
26152621
else {
@@ -2620,8 +2626,9 @@ ur_result_t enqueueImpCommandBufferKernel(
26202626
/* pPropSizeRet = */ nullptr);
26212627

26222628
const bool EnforcedLocalSize =
2623-
(RequiredWGSize[0] != 0 || RequiredWGSize[1] != 0 ||
2624-
RequiredWGSize[2] != 0);
2629+
(RequiredWGSize[0] != 0 &&
2630+
(NDRDesc.Dims < 2 || RequiredWGSize[1] != 0) &&
2631+
(NDRDesc.Dims < 3 || RequiredWGSize[2] != 0));
26252632
if (EnforcedLocalSize)
26262633
LocalSize = RequiredWGSize;
26272634
}
@@ -2637,7 +2644,8 @@ ur_result_t enqueueImpCommandBufferKernel(
26372644

26382645
ur_result_t Res =
26392646
Adapter.call_nocheck<UrApiKind::urCommandBufferAppendKernelLaunchExp>(
2640-
CommandBuffer, UrKernel, NDRDesc.Dims, &NDRDesc.GlobalOffset[0],
2647+
CommandBuffer, UrKernel, NDRDesc.Dims,
2648+
HasOffset ? &NDRDesc.GlobalOffset[0] : nullptr,
26412649
&NDRDesc.GlobalSize[0], LocalSize, AltUrKernels.size(),
26422650
AltUrKernels.size() ? AltUrKernels.data() : nullptr,
26432651
SyncPoints.size(), SyncPoints.size() ? SyncPoints.data() : nullptr, 0,

sycl/source/handler.cpp

Lines changed: 2 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -955,54 +955,8 @@ event handler::finalize() {
955955
// If the queue has an associated graph then we need to take the CG and pass
956956
// it to the graph to create a node, rather than submit it to the scheduler.
957957
if (auto GraphImpl = Queue->getCommandGraph(); GraphImpl) {
958-
auto EventImpl = detail::event_impl::create_completed_host_event();
959-
EventImpl->setSubmittedQueue(Queue->weak_from_this());
960-
ext::oneapi::experimental::detail::node_impl *NodeImpl = nullptr;
961-
962-
// GraphImpl is read and written in this scope so we lock this graph
963-
// with full priviledges.
964-
ext::oneapi::experimental::detail::graph_impl::WriteLock Lock(
965-
GraphImpl->MMutex);
966-
967-
ext::oneapi::experimental::node_type NodeType =
968-
impl->MUserFacingNodeType != ext::oneapi::experimental::node_type::empty
969-
? impl->MUserFacingNodeType
970-
: ext::oneapi::experimental::detail::getNodeTypeFromCG(getType());
971-
972-
// Create a new node in the graph representing this command-group
973-
if (Queue->isInOrder()) {
974-
// In-order queues create implicit linear dependencies between nodes.
975-
// Find the last node added to the graph from this queue, so our new
976-
// node can set it as a predecessor.
977-
std::vector<ext::oneapi::experimental::detail::node_impl *> Deps;
978-
if (ext::oneapi::experimental::detail::node_impl *DependentNode =
979-
GraphImpl->getLastInorderNode(Queue)) {
980-
Deps.push_back(DependentNode);
981-
}
982-
NodeImpl = &GraphImpl->add(NodeType, std::move(CommandGroup), Deps);
983-
984-
// If we are recording an in-order queue remember the new node, so it
985-
// can be used as a dependency for any more nodes recorded from this
986-
// queue.
987-
GraphImpl->setLastInorderNode(*Queue, *NodeImpl);
988-
} else {
989-
ext::oneapi::experimental::detail::node_impl
990-
*LastBarrierRecordedFromQueue =
991-
GraphImpl->getBarrierDep(Queue->weak_from_this());
992-
std::vector<ext::oneapi::experimental::detail::node_impl *> Deps;
993-
994-
if (LastBarrierRecordedFromQueue) {
995-
Deps.push_back(LastBarrierRecordedFromQueue);
996-
}
997-
NodeImpl = &GraphImpl->add(NodeType, std::move(CommandGroup), Deps);
998-
999-
if (NodeImpl->MCGType == sycl::detail::CGType::Barrier) {
1000-
GraphImpl->setBarrierDep(Queue->weak_from_this(), *NodeImpl);
1001-
}
1002-
}
1003-
1004-
// Associate an event with this new node and return the event.
1005-
GraphImpl->addEventForNode(EventImpl, *NodeImpl);
958+
auto EventImpl = Queue->submit_command_to_graph(
959+
*GraphImpl, std::move(CommandGroup), type, impl->MUserFacingNodeType);
1006960

1007961
#ifdef __INTEL_PREVIEW_BREAKING_CHANGES
1008962
return EventImpl;

sycl/unittests/Extensions/CommandGraph/CommandGraph.cpp

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -626,8 +626,6 @@ TEST_F(CommandGraphTest, AccessorModeEdges) {
626626

627627
// Tests the transitive queue recording behaviour with queue shortcuts.
628628
TEST_F(CommandGraphTest, TransitiveRecordingShortcuts) {
629-
// Graphs not supported yet for the no-handler submit path
630-
#ifndef __DPCPP_ENABLE_UNFINISHED_NO_CGH_SUBMIT
631629
device Dev;
632630
context Ctx{{Dev}};
633631
queue Q1{Ctx, Dev};
@@ -671,7 +669,6 @@ TEST_F(CommandGraphTest, TransitiveRecordingShortcuts) {
671669
ext::oneapi::experimental::queue_state::executing);
672670
ASSERT_EQ(Q3.ext_oneapi_get_state(),
673671
ext::oneapi::experimental::queue_state::executing);
674-
#endif
675672
}
676673

677674
// Tests that dynamic_work_group_memory.get() will throw on the host side.

0 commit comments

Comments
 (0)