Skip to content

Commit cef1d17

Browse files
Add new flag
Signed-off-by: James Brodman <[email protected]>
1 parent 242fc55 commit cef1d17

File tree

4 files changed

+19
-11
lines changed

4 files changed

+19
-11
lines changed

TESTS.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ PhysicalMemDestroy|measures time spent in zePhysicalMemDestroy on CPU.|<ul></ul>
5252
ResetCommandList|measures time spent in zeCommandListReset on CPU.|<ul><li>--CopyOnly Create copy only cmdlist (0 or 1)</li><li>--size Size of the buffer</li><li>--sourcePlacement Placement of the source buffer (Device or Host or Shared or non-USM-mapped or non-USMmisaligned or non-USM4KBAligned or non-USM2MBAligned or non-USMmisaligned-imported or non-USM4KBAligned-imported or non-USM2MBAligned-imported)</li></ul>|:heavy_check_mark:|:x:|
5353
SetKernelArgSvmPointer|measures time spent in clSetKernelArgSVMPointer on CPU.|<ul><li>--allocationSize Size of svm allocations, in bytes</li><li>--allocationsCount Number of allocations</li><li>--reallocate Allocations will be freed and allocated again between setKernelArgs (0 or 1)</li></ul>|:heavy_check_mark:|:heavy_check_mark:|
5454
SetKernelGroupSize|measures time spent in zeKernelSetGroupSize on CPU.|<ul><li>--asymmetricLocalWorkSize Use asymmetric local workSize (0 or 1)</li></ul>|:heavy_check_mark:|:x:|
55-
SubmitKernel|measures time spent in submitting a kernel to a SYCL (or SYCL-like) queue on CPU.|<ul><li>--DiscardEvents Create the queue with the discard_events property (0 or 1)</li><li>--Ioq Create the queue with the in_order property (0 or 1)</li><li>--KernelExecTime Approximately how long a single kernel executes, in us</li><li>--MeasureCompletion Measures time taken to complete the submission (default is to measure only submit calls) (0 or 1)</li><li>--NumKernels Number of kernels to submit to the queue</li><li>--Profiling Create the queue with the enable_profiling property (0 or 1)</li></ul>|:heavy_check_mark:|:heavy_check_mark:|
55+
SubmitKernel|measures time spent in submitting a kernel to a SYCL (or SYCL-like) queue on CPU.|<ul><li>--DiscardEvents Create the queue with the discard_events property (0 or 1)</li><li>--EnqueueFunctions Use the eventless SYCL enqueue functions (0 or 1)</li><li>--Ioq Create the queue with the in_order property (0 or 1)</li><li>--KernelExecTime Approximately how long a single kernel executes, in us</li><li>--MeasureCompletion Measures time taken to complete the submission (default is to measure only submit calls) (0 or 1)</li><li>--NumKernels Number of kernels to submit to the queue</li><li>--Profiling Create the queue with the enable_profiling property (0 or 1)</li></ul>|:heavy_check_mark:|:heavy_check_mark:|
5656
UsmMemoryAllocation|measures time spent in USM memory allocation APIs.|<ul><li>--measureMode Specifies which APIs to measure (Allocate or Free or Both)</li><li>--size Size to allocate</li><li>--type Type of memory being allocated (Device or Host or Shared)</li></ul>|:heavy_check_mark:|:x:|
5757
VirtualMemFree|measures time spent in zeVirtualMemFree on CPU.|<ul><li>--freeSize Size in bytes to be freed</li></ul>|:heavy_check_mark:|:x:|
5858
VirtualMemGetAccessAttrib|measures time spent in zeVirtualMemGetAccessAttribute on CPU.|<ul><li>--size Size in bytes to get the access attribute</li></ul>|:heavy_check_mark:|:x:|

source/benchmarks/api_overhead_benchmark/definitions/submit_kernel.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (C) 2022-2023 Intel Corporation
2+
* Copyright (C) 2022-2025 Intel Corporation
33
*
44
* SPDX-License-Identifier: MIT
55
*
@@ -13,6 +13,7 @@
1313
struct SubmitKernelArguments : TestCaseArgumentContainer {
1414
BooleanArgument useProfiling;
1515
BooleanArgument inOrderQueue;
16+
BooleanArgument useEnqueueFunctions;
1617
BooleanArgument discardEvents;
1718
PositiveIntegerArgument numKernels;
1819
PositiveIntegerArgument kernelExecutionTime;
@@ -21,6 +22,7 @@ struct SubmitKernelArguments : TestCaseArgumentContainer {
2122
SubmitKernelArguments()
2223
: useProfiling(*this, "Profiling", "Create the queue with the enable_profiling property"),
2324
inOrderQueue(*this, "Ioq", "Create the queue with the in_order property"),
25+
useEnqueueFunctions(*this, "EnqueueFunctions", "Use the eventless SYCL enqueue functions"),
2426
discardEvents(*this, "DiscardEvents", "Create the queue with the discard_events property"),
2527
numKernels(*this, "NumKernels", "Number of kernels to submit to the queue"),
2628
kernelExecutionTime(*this, "KernelExecTime", "Approximately how long a single kernel executes, in us"),

source/benchmarks/api_overhead_benchmark/gtest/submit_kernel.cpp

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (C) 2022-2023 Intel Corporation
2+
* Copyright (C) 2022-2025 Intel Corporation
33
*
44
* SPDX-License-Identifier: MIT
55
*
@@ -14,18 +14,19 @@
1414

1515
[[maybe_unused]] static const inline RegisterTestCase<SubmitKernel> registerTestCase{};
1616

17-
class SubmitKernelTest : public ::testing::TestWithParam<std::tuple<Api, bool, bool, bool, size_t, size_t, bool>> {
17+
class SubmitKernelTest : public ::testing::TestWithParam<std::tuple<Api, bool, bool, bool, bool, size_t, size_t, bool>> {
1818
};
1919

2020
TEST_P(SubmitKernelTest, Test) {
2121
SubmitKernelArguments args{};
2222
args.api = std::get<0>(GetParam());
2323
args.useProfiling = std::get<1>(GetParam());
2424
args.inOrderQueue = std::get<2>(GetParam());
25-
args.discardEvents = std::get<3>(GetParam());
26-
args.numKernels = std::get<4>(GetParam());
27-
args.kernelExecutionTime = std::get<5>(GetParam());
28-
args.measureCompletionTime = std::get<6>(GetParam());
25+
args.useEnqueueFunctions = std::get<3>(GetParam());
26+
args.discardEvents = std::get<4>(GetParam());
27+
args.numKernels = std::get<5>(GetParam());
28+
args.kernelExecutionTime = std::get<6>(GetParam());
29+
args.measureCompletionTime = std::get<7>(GetParam());
2930
SubmitKernel test;
3031
test.run(args);
3132
}
@@ -37,7 +38,8 @@ INSTANTIATE_TEST_SUITE_P(
3738
::CommonGtestArgs::allApis(),
3839
::testing::Values(false), // useProfiling
3940
::testing::Values(false, true), // inOrderQueue
41+
::testing::Values(false, true), // useEnqueueFunctions
4042
::testing::Values(false, true), // discardEvents
4143
::testing::Values(10u), // numKernels
4244
::testing::Values(1u), // kernelExecutionTime
43-
::testing::Values(false, true))); // measureCompletionTime
45+
::testing::Values(false, true))); // measureCompletionTime

source/benchmarks/api_overhead_benchmark/implementations/sycl/submit_kernel_sycl.cpp

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (C) 2023-2024 Intel Corporation
2+
* Copyright (C) 2023-2025 Intel Corporation
33
*
44
* SPDX-License-Identifier: MIT
55
*
@@ -72,7 +72,11 @@ static TestResult run(const SubmitKernelArguments &arguments, Statistics &statis
7272
for (auto i = 0u; i < arguments.iterations; i++) {
7373
timer.measureStart();
7474
for (auto iteration = 0u; iteration < arguments.numKernels; iteration++) {
75-
queue.parallel_for(range, eat_time);
75+
if (arguments.useEnqueueFunctions) {
76+
sycl::ext::oneapi::experimental::nd_launch(queue, range, eat_time);
77+
} else {
78+
queue.parallel_for(range, eat_time);
79+
}
7680
}
7781

7882
if (!arguments.measureCompletionTime) {

0 commit comments

Comments
 (0)