Skip to content

Commit bb2fd63

Browse files
committed
New extension for a wait that sleeps rather than running CPU full tilt, for IoT and similar applications
1 parent e2075c7 commit bb2fd63

File tree

3 files changed

+348
-0
lines changed

3 files changed

+348
-0
lines changed
Lines changed: 165 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,165 @@
1+
= sycl_ext_oneapi_throttled_wait
2+
3+
:source-highlighter: coderay
4+
:coderay-linenums-mode: table
5+
6+
// This section needs to be after the document title.
7+
:doctype: book
8+
:toc2:
9+
:toc: left
10+
:encoding: utf-8
11+
:lang: en
12+
:dpcpp: pass:[DPC++]
13+
:endnote: —{nbsp}end{nbsp}note
14+
15+
// Set the default source code type in this document to C++,
16+
// for syntax highlighting purposes. This is needed because
17+
// docbook uses c++ and html5 uses cpp.
18+
:language: {basebackend@docbook:c++:cpp}
19+
20+
21+
== Notice
22+
23+
[%hardbreaks]
24+
Copyright (C) 2024 Intel Corporation. All rights reserved.
25+
26+
Khronos(R) is a registered trademark and SYCL(TM) and SPIR(TM) are trademarks
27+
of The Khronos Group Inc.
28+
OpenCL(TM) is a trademark of Apple Inc. used by permission by Khronos.
29+
30+
31+
== Contact
32+
33+
To report problems with this extension, please open a new issue at:
34+
35+
https://github.com/intel/llvm/issues
36+
37+
38+
== Dependencies
39+
40+
This extension is written against the SYCL 2020 revision 8 specification.
41+
All references below to the "core SYCL specification" or to section numbers in
42+
the SYCL specification refer to that revision.
43+
44+
45+
== Status
46+
47+
This is an experimental extension specification, intended to provide early
48+
access to features and gather community feedback. Interfaces defined in
49+
this specification are implemented in DPC++, but they are not finalized
50+
and may change incompatibly in future versions of DPC++ without prior notice.
51+
*Shipping software products should not rely on APIs defined in
52+
this specification.*
53+
54+
55+
== Overview
56+
57+
This extension adds simple APIs for an alternate "sleeping" wait implementation.
58+
This is for scenarios (such as IoT) where one might want to trade a bit of
59+
performance in exchange for having the host CPU be more available, not burning
60+
cycles intently waiting.
61+
62+
== Specification
63+
64+
=== Additional Inclusion
65+
66+
As throttled_wait is presently an experimental extension, it requires an
67+
additional inclusion to use.
68+
69+
```c++
70+
#include <sycl/sycl.hpp>
71+
#include <sycl/ext/oneapi/experimental/throttled_wait.hpp>
72+
73+
// now the extension API is available
74+
```
75+
76+
=== Feature test macro
77+
78+
This extension provides a feature-test macro as described in the core SYCL
79+
specification.
80+
An implementation supporting this extension must predefine the macro
81+
`SYCL_EXT_ONEAPI_THROTTLED_WAIT`
82+
to one of the values defined in the table below.
83+
Applications can test for the existence of this macro to determine if
84+
the implementation supports this feature, or applications can test the macro's
85+
value to determine which of the extension's features the implementation
86+
supports.
87+
88+
[%header,cols="1,5"]
89+
|===
90+
|Value
91+
|Description
92+
93+
|1
94+
|The APIs of this experimental extension are not versioned, so the
95+
feature-test macro always has this value.
96+
|===
97+
98+
=== API of the extension
99+
100+
This extension adds the following free functions, where sleep is one
101+
of the types supported by std::chrono::duration (e.g.
102+
std::chrono::milliseconds, std::chrono::microseconds, etc)
103+
104+
For each of these calls, while waiting for the sycl object to
105+
complete, the host process sleeps for the sleep duration paramater.
106+
107+
108+
```c++
109+
namespace sycl::ext::oneapi::experimental {
110+
111+
template <typename Rep, typename Period>
112+
void ext_oneapi_throttled_wait(sycl::event& e, const std::chrono::duration<Rep, Period>& sleep);
113+
114+
template <typename Rep, typename Period>
115+
void ext_oneapi_throttled_wait((std::vector<sycl::event>& eventList, const std::chrono::duration<Rep, Period>& sleep)
116+
117+
template <typename Rep, typename Period>
118+
void ext_oneapi_throttled_wait_and_throw(sycl::event& e, const std::chrono::duration<Rep, Period>& sleep)
119+
120+
template <typename Rep, typename Period>
121+
void ext_oneapi_throttled_wait_and_throw((std::vector<sycl::event>& eventList, const std::chrono::duration<Rep, Period>& sleep)
122+
123+
124+
} // namespace sycl::ext::oneapi::experimental
125+
```
126+
127+
128+
== Example
129+
130+
The following example demonstrates simple usage of this API.
131+
132+
```
133+
#include <sycl/sycl.hpp>
134+
#include <sycl/ext/oneapi/experimental/throttled_wait.hpp>
135+
namespace syclex = sycl::ext::oneapi::experimental;
136+
137+
constexpr uint64_t N = 1000000000; // a very big N for looping.
138+
139+
int main() {
140+
sycl::queue q;
141+
uint64_t a = 0;
142+
143+
{
144+
sycl::buffer<uint64_t, 1> buf(&a, sycl::range<1>(1));
145+
146+
sycl::event e = q.submit([&](sycl::handler &cgh) {
147+
sycl::accessor acc(buf, cgh, sycl::read_write);
148+
cgh.single_task<class hello_world>([=]() {
149+
for(long i = 0; i < N; i++) {
150+
acc[0] = acc[0] + 1;
151+
}
152+
});
153+
});
154+
#ifdef SYCL_EXT_ONEAPI_THROTTLED_WAIT
155+
syclex::ext_oneapi_throttled_wait(e, std::chrono::milliseconds(100));
156+
#else
157+
e.wait();
158+
#endif
159+
} // buffer goes out of scope, data copied back to 'a'.
160+
161+
std::cout << "a: " << a << std::endl;
162+
163+
return 0;
164+
}
165+
```
Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
//===------- throttled_wait.hpp - sleeping implementation of wait ------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#pragma once
10+
11+
#include <chrono>
12+
#include <thread>
13+
14+
// The throttled_wait extension requires the inclusion of this header.
15+
// If we instead want it to be included with sycl.hpp, then this defnition
16+
// will need to be removed from here and
17+
// added to llvm/sycl/source/feature_test.hpp.in instead.
18+
#define SYCL_EXT_ONEAPI_THROTTLED_WAIT 1
19+
20+
namespace sycl {
21+
inline namespace _V1 {
22+
namespace ext::oneapi::experimental {
23+
24+
template <typename Rep, typename Period>
25+
void ext_oneapi_throttled_wait(
26+
sycl::event &e, const std::chrono::duration<Rep, Period> &sleep) {
27+
while (e.get_info<sycl::info::event::command_execution_status>() !=
28+
sycl::info::event_command_status::complete) {
29+
std::this_thread::sleep_for(sleep);
30+
}
31+
e.wait();
32+
}
33+
34+
template <typename Rep, typename Period>
35+
void ext_oneapi_throttled_wait(
36+
std::vector<sycl::event> &eventList,
37+
const std::chrono::duration<Rep, Period> &sleep) {
38+
for (sycl::event &e : eventList) {
39+
while (e.get_info<sycl::info::event::command_execution_status>() !=
40+
sycl::info::event_command_status::complete) {
41+
std::this_thread::sleep_for(sleep);
42+
}
43+
e.wait();
44+
}
45+
}
46+
47+
template <typename Rep, typename Period>
48+
void ext_oneapi_throttled_wait_and_throw(
49+
sycl::event &e, const std::chrono::duration<Rep, Period> &sleep) {
50+
while (e.get_info<sycl::info::event::command_execution_status>() !=
51+
sycl::info::event_command_status::complete) {
52+
std::this_thread::sleep_for(sleep);
53+
}
54+
e.wait_and_throw();
55+
}
56+
57+
template <typename Rep, typename Period>
58+
void ext_oneapi_throttled_wait_and_throw(
59+
std::vector<sycl::event> &eventList,
60+
const std::chrono::duration<Rep, Period> &sleep) {
61+
for (sycl::event &e : eventList) {
62+
while (e.get_info<sycl::info::event::command_execution_status>() !=
63+
sycl::info::event_command_status::complete) {
64+
std::this_thread::sleep_for(sleep);
65+
}
66+
e.wait_and_throw();
67+
}
68+
}
69+
70+
} // namespace ext::oneapi::experimental
71+
} // namespace _V1
72+
} // namespace sycl
Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,111 @@
1+
// RUN: %{build} -o %t.out
2+
// RUN: %{run} %t.out
3+
4+
#include <sycl/ext/oneapi/experimental/throttled_wait.hpp>
5+
#include <sycl/sycl.hpp>
6+
7+
namespace syclex = sycl::ext::oneapi::experimental;
8+
9+
// a very big N for looping in long running kernel
10+
constexpr uint64_t N = 1000000000;
11+
12+
void test_wait_and_throw(sycl::queue &q) {
13+
try {
14+
sycl::event e = q.submit([&](sycl::handler &CGH) {
15+
CGH.host_task([=]() {
16+
throw std::runtime_error("Exception thrown from host_task.");
17+
});
18+
});
19+
syclex::ext_oneapi_throttled_wait_and_throw(e,
20+
std::chrono::milliseconds(100));
21+
22+
assert(false &&
23+
"We should not be here. Exception should have been thrown.");
24+
} catch (std::runtime_error &e) {
25+
assert(std::string(e.what()) == "Exception thrown from host_task.");
26+
std::cout << "Caught exception: " << e.what() << std::endl;
27+
}
28+
}
29+
30+
void test_wait(sycl::queue &q) {
31+
// fast kernel
32+
sycl::event fast =
33+
q.submit([&](sycl::handler &cgh) { cgh.single_task([=]() {}); });
34+
syclex::ext_oneapi_throttled_wait(fast, std::chrono::milliseconds(100));
35+
36+
// slow kernel
37+
uint64_t a = 0;
38+
{
39+
sycl::buffer<uint64_t, 1> buf(&a, sycl::range<1>(1));
40+
41+
sycl::event slow = q.submit([&](sycl::handler &cgh) {
42+
sycl::accessor acc(buf, cgh, sycl::read_write);
43+
cgh.single_task<class hello_world>([=]() {
44+
for (long i = 0; i < N; i++) {
45+
acc[0] = acc[0] + 1;
46+
}
47+
});
48+
});
49+
syclex::ext_oneapi_throttled_wait(slow, std::chrono::milliseconds(100));
50+
} // buffer goes out of scope, data copied back to 'a'.
51+
52+
std::cout << "a: " << a << std::endl;
53+
assert(a == N);
54+
55+
// Ensure compatible with discarded events.
56+
auto DiscardedEvent = q.ext_oneapi_submit_barrier();
57+
syclex::ext_oneapi_throttled_wait(DiscardedEvent,
58+
std::chrono::milliseconds(100));
59+
}
60+
61+
std::vector<sycl::event> create_event_list(sycl::queue &q) {
62+
std::vector<sycl::event> events;
63+
sycl::event slow = q.submit([&](sycl::handler &cgh) {
64+
cgh.single_task([=]() {
65+
for (long i = 0; i < N; i++) {
66+
}
67+
});
68+
});
69+
events.push_back(slow);
70+
71+
sycl::event fast =
72+
q.submit([&](sycl::handler &cgh) { cgh.single_task([=]() {}); });
73+
events.push_back(fast);
74+
75+
sycl::event DiscardedEvent = q.ext_oneapi_submit_barrier();
76+
events.push_back(DiscardedEvent);
77+
78+
return events;
79+
}
80+
81+
void test_wait_event_list(sycl::queue &q) {
82+
auto events = create_event_list(q);
83+
syclex::ext_oneapi_throttled_wait(events, std::chrono::milliseconds(100));
84+
}
85+
86+
void test_wait_and_throw_event_list(sycl::queue &q) {
87+
auto events = create_event_list(q);
88+
syclex::ext_oneapi_throttled_wait_and_throw(events,
89+
std::chrono::milliseconds(100));
90+
}
91+
92+
int main() {
93+
auto asyncHandler = [](sycl::exception_list el) {
94+
for (auto &e : el) {
95+
std::rethrow_exception(e);
96+
}
97+
};
98+
sycl::queue q(asyncHandler);
99+
100+
#ifdef SYCL_EXT_ONEAPI_THROTTLED_WAIT
101+
test_wait(q);
102+
test_wait_and_throw(q);
103+
test_wait_event_list(q);
104+
test_wait_and_throw_event_list(q);
105+
#else
106+
assert(false &&
107+
"SYCL_EXT_ONEAPI_THROTTLED_WAIT feature test macro not defined");
108+
#endif
109+
110+
return 0;
111+
}

0 commit comments

Comments
 (0)