diff --git a/sycl/doc/extensions/experimental/sycl_ext_oneapi_throttled_wait.asciidoc b/sycl/doc/extensions/experimental/sycl_ext_oneapi_throttled_wait.asciidoc new file mode 100644 index 0000000000000..a831efd21c338 --- /dev/null +++ b/sycl/doc/extensions/experimental/sycl_ext_oneapi_throttled_wait.asciidoc @@ -0,0 +1,165 @@ += sycl_ext_oneapi_throttled_wait + +:source-highlighter: coderay +:coderay-linenums-mode: table + +// This section needs to be after the document title. +:doctype: book +:toc2: +:toc: left +:encoding: utf-8 +:lang: en +:dpcpp: pass:[DPC++] +:endnote: —{nbsp}end{nbsp}note + +// Set the default source code type in this document to C++, +// for syntax highlighting purposes. This is needed because +// docbook uses c++ and html5 uses cpp. +:language: {basebackend@docbook:c++:cpp} + + +== Notice + +[%hardbreaks] +Copyright (C) 2024 Intel Corporation. All rights reserved. + +Khronos(R) is a registered trademark and SYCL(TM) and SPIR(TM) are trademarks +of The Khronos Group Inc. +OpenCL(TM) is a trademark of Apple Inc. used by permission by Khronos. + + +== Contact + +To report problems with this extension, please open a new issue at: + +https://github.com/intel/llvm/issues + + +== Dependencies + +This extension is written against the SYCL 2020 revision 8 specification. +All references below to the "core SYCL specification" or to section numbers in +the SYCL specification refer to that revision. + + +== Status + +This is an experimental extension specification, intended to provide early +access to features and gather community feedback. Interfaces defined in +this specification are implemented in DPC++, but they are not finalized +and may change incompatibly in future versions of DPC++ without prior notice. +*Shipping software products should not rely on APIs defined in +this specification.* + + +== Overview + +This extension adds simple APIs for an alternate "sleeping" wait implementation. +This is for scenarios (such as IoT) where one might want to trade a bit of +performance in exchange for having the host CPU be more available, not burning +cycles intently waiting. + +== Specification + +=== Additional Inclusion + +As throttled_wait is presently an experimental extension, it requires an +additional inclusion to use. + +```c++ +#include +#include + +// now the extension API is available +``` + +=== Feature test macro + +This extension provides a feature-test macro as described in the core SYCL +specification. +An implementation supporting this extension must predefine the macro +`SYCL_EXT_ONEAPI_THROTTLED_WAIT` +to one of the values defined in the table below. +Applications can test for the existence of this macro to determine if +the implementation supports this feature, or applications can test the macro's +value to determine which of the extension's features the implementation +supports. + +[%header,cols="1,5"] +|=== +|Value +|Description + +|1 +|The APIs of this experimental extension are not versioned, so the + feature-test macro always has this value. +|=== + +=== API of the extension + +This extension adds the following free functions, where sleep is one +of the types supported by std::chrono::duration (e.g. +std::chrono::milliseconds, std::chrono::microseconds, etc) + +For each of these calls, while waiting for the sycl object to +complete, the host process sleeps for the sleep duration paramater. + + +```c++ +namespace sycl::ext::oneapi::experimental { + +template +void ext_oneapi_throttled_wait(sycl::event& e, const std::chrono::duration& sleep); + +template +void ext_oneapi_throttled_wait((std::vector& eventList, const std::chrono::duration& sleep) + +template +void ext_oneapi_throttled_wait_and_throw(sycl::event& e, const std::chrono::duration& sleep) + +template +void ext_oneapi_throttled_wait_and_throw((std::vector& eventList, const std::chrono::duration& sleep) + + +} // namespace sycl::ext::oneapi::experimental +``` + + +== Example + +The following example demonstrates simple usage of this API. + +``` +#include +#include +namespace syclex = sycl::ext::oneapi::experimental; + +constexpr uint64_t N = 1000000000; // a very big N for looping. + +int main() { + sycl::queue q; + uint64_t a = 0; + + { + sycl::buffer buf(&a, sycl::range<1>(1)); + + sycl::event e = q.submit([&](sycl::handler &cgh) { + sycl::accessor acc(buf, cgh, sycl::read_write); + cgh.single_task([=]() { + for(long i = 0; i < N; i++) { + acc[0] = acc[0] + 1; + } + }); + }); +#ifdef SYCL_EXT_ONEAPI_THROTTLED_WAIT + syclex::ext_oneapi_throttled_wait(e, std::chrono::milliseconds(100)); +#else + e.wait(); +#endif + } // buffer goes out of scope, data copied back to 'a'. + + std::cout << "a: " << a << std::endl; + + return 0; +} +``` diff --git a/sycl/include/sycl/ext/oneapi/experimental/throttled_wait.hpp b/sycl/include/sycl/ext/oneapi/experimental/throttled_wait.hpp new file mode 100644 index 0000000000000..2b885939fb83b --- /dev/null +++ b/sycl/include/sycl/ext/oneapi/experimental/throttled_wait.hpp @@ -0,0 +1,72 @@ +//===------- throttled_wait.hpp - sleeping implementation of wait ------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#pragma once + +#include +#include + +// The throttled_wait extension requires the inclusion of this header. +// If we instead want it to be included with sycl.hpp, then this defnition +// will need to be removed from here and +// added to llvm/sycl/source/feature_test.hpp.in instead. +#define SYCL_EXT_ONEAPI_THROTTLED_WAIT 1 + +namespace sycl { +inline namespace _V1 { +namespace ext::oneapi::experimental { + +template +void ext_oneapi_throttled_wait( + sycl::event &e, const std::chrono::duration &sleep) { + while (e.get_info() != + sycl::info::event_command_status::complete) { + std::this_thread::sleep_for(sleep); + } + e.wait(); +} + +template +void ext_oneapi_throttled_wait( + std::vector &eventList, + const std::chrono::duration &sleep) { + for (sycl::event &e : eventList) { + while (e.get_info() != + sycl::info::event_command_status::complete) { + std::this_thread::sleep_for(sleep); + } + e.wait(); + } +} + +template +void ext_oneapi_throttled_wait_and_throw( + sycl::event &e, const std::chrono::duration &sleep) { + while (e.get_info() != + sycl::info::event_command_status::complete) { + std::this_thread::sleep_for(sleep); + } + e.wait_and_throw(); +} + +template +void ext_oneapi_throttled_wait_and_throw( + std::vector &eventList, + const std::chrono::duration &sleep) { + for (sycl::event &e : eventList) { + while (e.get_info() != + sycl::info::event_command_status::complete) { + std::this_thread::sleep_for(sleep); + } + e.wait_and_throw(); + } +} + +} // namespace ext::oneapi::experimental +} // namespace _V1 +} // namespace sycl \ No newline at end of file diff --git a/sycl/test-e2e/ThrottledWait/test_ext_throttled_wait.cpp b/sycl/test-e2e/ThrottledWait/test_ext_throttled_wait.cpp new file mode 100644 index 0000000000000..cbef4c1d70272 --- /dev/null +++ b/sycl/test-e2e/ThrottledWait/test_ext_throttled_wait.cpp @@ -0,0 +1,111 @@ +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out + +#include +#include + +namespace syclex = sycl::ext::oneapi::experimental; + +// a very big N for looping in long running kernel +constexpr uint64_t N = 1000000000; + +void test_wait_and_throw(sycl::queue &q) { + try { + sycl::event e = q.submit([&](sycl::handler &CGH) { + CGH.host_task([=]() { + throw std::runtime_error("Exception thrown from host_task."); + }); + }); + syclex::ext_oneapi_throttled_wait_and_throw(e, + std::chrono::milliseconds(100)); + + assert(false && + "We should not be here. Exception should have been thrown."); + } catch (std::runtime_error &e) { + assert(std::string(e.what()) == "Exception thrown from host_task."); + std::cout << "Caught exception: " << e.what() << std::endl; + } +} + +void test_wait(sycl::queue &q) { + // fast kernel + sycl::event fast = + q.submit([&](sycl::handler &cgh) { cgh.single_task([=]() {}); }); + syclex::ext_oneapi_throttled_wait(fast, std::chrono::milliseconds(100)); + + // slow kernel + uint64_t a = 0; + { + sycl::buffer buf(&a, sycl::range<1>(1)); + + sycl::event slow = q.submit([&](sycl::handler &cgh) { + sycl::accessor acc(buf, cgh, sycl::read_write); + cgh.single_task([=]() { + for (long i = 0; i < N; i++) { + acc[0] = acc[0] + 1; + } + }); + }); + syclex::ext_oneapi_throttled_wait(slow, std::chrono::milliseconds(100)); + } // buffer goes out of scope, data copied back to 'a'. + + std::cout << "a: " << a << std::endl; + assert(a == N); + + // Ensure compatible with discarded events. + auto DiscardedEvent = q.ext_oneapi_submit_barrier(); + syclex::ext_oneapi_throttled_wait(DiscardedEvent, + std::chrono::milliseconds(100)); +} + +std::vector create_event_list(sycl::queue &q) { + std::vector events; + sycl::event slow = q.submit([&](sycl::handler &cgh) { + cgh.single_task([=]() { + for (long i = 0; i < N; i++) { + } + }); + }); + events.push_back(slow); + + sycl::event fast = + q.submit([&](sycl::handler &cgh) { cgh.single_task([=]() {}); }); + events.push_back(fast); + + sycl::event DiscardedEvent = q.ext_oneapi_submit_barrier(); + events.push_back(DiscardedEvent); + + return events; +} + +void test_wait_event_list(sycl::queue &q) { + auto events = create_event_list(q); + syclex::ext_oneapi_throttled_wait(events, std::chrono::milliseconds(100)); +} + +void test_wait_and_throw_event_list(sycl::queue &q) { + auto events = create_event_list(q); + syclex::ext_oneapi_throttled_wait_and_throw(events, + std::chrono::milliseconds(100)); +} + +int main() { + auto asyncHandler = [](sycl::exception_list el) { + for (auto &e : el) { + std::rethrow_exception(e); + } + }; + sycl::queue q(asyncHandler); + +#ifdef SYCL_EXT_ONEAPI_THROTTLED_WAIT + test_wait(q); + test_wait_and_throw(q); + test_wait_event_list(q); + test_wait_and_throw_event_list(q); +#else + assert(false && + "SYCL_EXT_ONEAPI_THROTTLED_WAIT feature test macro not defined"); +#endif + + return 0; +} \ No newline at end of file