Skip to content

Commit 9bd3fbc

Browse files
authored
[SYCL] do not set zeroed GlobalOffset in kernel parameters (#20082)
Do not set a zeroed GlobalOffset in kernel parameters - the Unified Runtime layer will handle this correctly. Note that there is already a similar optimization in https://github.com/intel/llvm/blob/sycl/sycl/source/detail/scheduler/commands.cpp#L2477
1 parent 2c3f0a8 commit 9bd3fbc

File tree

11 files changed

+40
-24
lines changed

11 files changed

+40
-24
lines changed

sycl/source/detail/scheduler/commands.cpp

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2468,14 +2468,16 @@ static ur_result_t SetKernelParamsAndLaunch(
24682468
/* pPropSizeRet = */ nullptr);
24692469

24702470
const bool EnforcedLocalSize =
2471-
(RequiredWGSize[0] != 0 || RequiredWGSize[1] != 0 ||
2472-
RequiredWGSize[2] != 0);
2471+
(RequiredWGSize[0] != 0 &&
2472+
(NDRDesc.Dims < 2 || RequiredWGSize[1] != 0) &&
2473+
(NDRDesc.Dims < 3 || RequiredWGSize[2] != 0));
24732474
if (EnforcedLocalSize)
24742475
LocalSize = RequiredWGSize;
24752476
}
2476-
const bool HasOffset = NDRDesc.GlobalOffset[0] != 0 ||
2477-
NDRDesc.GlobalOffset[1] != 0 ||
2478-
NDRDesc.GlobalOffset[2] != 0;
2477+
2478+
const bool HasOffset = NDRDesc.GlobalOffset[0] != 0 &&
2479+
(NDRDesc.Dims < 2 || NDRDesc.GlobalOffset[1] != 0) &&
2480+
(NDRDesc.Dims < 3 || NDRDesc.GlobalOffset[2] != 0);
24792481

24802482
std::vector<ur_kernel_launch_property_t> property_list;
24812483

@@ -2610,6 +2612,10 @@ ur_result_t enqueueImpCommandBufferKernel(
26102612
size_t RequiredWGSize[3] = {0, 0, 0};
26112613
size_t *LocalSize = nullptr;
26122614

2615+
const bool HasOffset = NDRDesc.GlobalOffset[0] != 0 &&
2616+
(NDRDesc.Dims < 2 || NDRDesc.GlobalOffset[1] != 0) &&
2617+
(NDRDesc.Dims < 3 || NDRDesc.GlobalOffset[2] != 0);
2618+
26132619
if (HasLocalSize)
26142620
LocalSize = &NDRDesc.LocalSize[0];
26152621
else {
@@ -2620,8 +2626,9 @@ ur_result_t enqueueImpCommandBufferKernel(
26202626
/* pPropSizeRet = */ nullptr);
26212627

26222628
const bool EnforcedLocalSize =
2623-
(RequiredWGSize[0] != 0 || RequiredWGSize[1] != 0 ||
2624-
RequiredWGSize[2] != 0);
2629+
(RequiredWGSize[0] != 0 &&
2630+
(NDRDesc.Dims < 2 || RequiredWGSize[1] != 0) &&
2631+
(NDRDesc.Dims < 3 || RequiredWGSize[2] != 0));
26252632
if (EnforcedLocalSize)
26262633
LocalSize = RequiredWGSize;
26272634
}
@@ -2637,7 +2644,8 @@ ur_result_t enqueueImpCommandBufferKernel(
26372644

26382645
ur_result_t Res =
26392646
Adapter.call_nocheck<UrApiKind::urCommandBufferAppendKernelLaunchExp>(
2640-
CommandBuffer, UrKernel, NDRDesc.Dims, &NDRDesc.GlobalOffset[0],
2647+
CommandBuffer, UrKernel, NDRDesc.Dims,
2648+
HasOffset ? &NDRDesc.GlobalOffset[0] : nullptr,
26412649
&NDRDesc.GlobalSize[0], LocalSize, AltUrKernels.size(),
26422650
AltUrKernels.size() ? AltUrKernels.data() : nullptr,
26432651
SyncPoints.size(), SyncPoints.size() ? SyncPoints.data() : nullptr, 0,

unified-runtime/include/ur_api.h

Lines changed: 1 addition & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

unified-runtime/scripts/core/exp-command-buffer.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -353,7 +353,7 @@ params:
353353
desc: "[in] Dimension of the kernel execution."
354354
- type: "const size_t*"
355355
name: pGlobalWorkOffset
356-
desc: "[in] Offset to use when executing kernel."
356+
desc: "[in][optional] Offset to use when executing kernel."
357357
- type: "const size_t*"
358358
name: pGlobalWorkSize
359359
desc: "[in] Global work size to use when executing kernel."

unified-runtime/source/adapters/cuda/command_buffer.cpp

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -109,8 +109,15 @@ kernel_command_data::kernel_command_data(
109109
ur_kernel_handle_t *KernelAlternatives)
110110
: Kernel(Kernel), Params(Params), WorkDim(WorkDim) {
111111
const size_t CopySize = sizeof(size_t) * WorkDim;
112-
std::memcpy(GlobalWorkOffset, GlobalWorkOffsetPtr, CopySize);
113112
std::memcpy(GlobalWorkSize, GlobalWorkSizePtr, CopySize);
113+
114+
// GlobalWorkOffsetPtr may be nullptr
115+
if (GlobalWorkOffsetPtr) {
116+
std::memcpy(GlobalWorkOffset, GlobalWorkOffsetPtr, CopySize);
117+
} else {
118+
std::memset(GlobalWorkOffset, 0, sizeof(size_t) * 3);
119+
}
120+
114121
// Local work size may be nullptr
115122
if (LocalWorkSizePtr) {
116123
std::memcpy(LocalWorkSize, LocalWorkSizePtr, CopySize);

unified-runtime/source/adapters/hip/command_buffer.cpp

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,8 +55,15 @@ ur_exp_command_buffer_command_handle_t_::
5555
: handle_base(), CommandBuffer(CommandBuffer), Kernel(Kernel), Node(Node),
5656
Params(Params), WorkDim(WorkDim) {
5757
const size_t CopySize = sizeof(size_t) * WorkDim;
58-
std::memcpy(GlobalWorkOffset, GlobalWorkOffsetPtr, CopySize);
5958
std::memcpy(GlobalWorkSize, GlobalWorkSizePtr, CopySize);
59+
60+
// GlobalWorkOffsetPtr may be nullptr
61+
if (GlobalWorkOffsetPtr) {
62+
std::memcpy(GlobalWorkOffset, GlobalWorkOffsetPtr, CopySize);
63+
} else {
64+
std::memset(GlobalWorkOffset, 0, sizeof(size_t) * 3);
65+
}
66+
6067
// Local work size may be nullptr
6168
if (LocalWorkSizePtr) {
6269
std::memcpy(LocalWorkSize, LocalWorkSizePtr, CopySize);

unified-runtime/source/adapters/mock/ur_mockddi.cpp

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

unified-runtime/source/loader/layers/tracing/ur_trcddi.cpp

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

unified-runtime/source/loader/layers/validation/ur_valddi.cpp

Lines changed: 1 addition & 4 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

unified-runtime/source/loader/ur_ldrddi.cpp

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

unified-runtime/source/loader/ur_libapi.cpp

Lines changed: 1 addition & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)