diff --git a/llvm/include/llvm/SYCLLowerIR/DeviceConfigFile.td b/llvm/include/llvm/SYCLLowerIR/DeviceConfigFile.td index dbfffb5d490eb..b4f7c71af7d9f 100644 --- a/llvm/include/llvm/SYCLLowerIR/DeviceConfigFile.td +++ b/llvm/include/llvm/SYCLLowerIR/DeviceConfigFile.td @@ -94,6 +94,9 @@ def AspectExt_oneapi_async_memory_alloc : Aspect<"ext_oneapi_async_memory_alloc" def AspectExt_intel_device_info_luid : Aspect<"ext_intel_device_info_luid">; def AspectExt_intel_device_info_node_mask : Aspect<"ext_intel_device_info_node_mask">; def Aspectext_oneapi_exportable_device_mem : Aspect<"ext_oneapi_exportable_device_mem">; +def Aspectext_oneapi_clock_sub_group : Aspect<"ext_oneapi_clock_sub_group">; +def Aspectext_oneapi_clock_work_group : Aspect<"ext_oneapi_clock_work_group">; +def Aspectext_oneapi_clock_device : Aspect<"ext_oneapi_clock_device">; // Deprecated aspects def AspectInt64_base_atomics : Aspect<"int64_base_atomics">; @@ -168,7 +171,10 @@ def : TargetInfo<"__TestAspectList", AspectExt_oneapi_async_memory_alloc, AspectExt_intel_device_info_luid, AspectExt_intel_device_info_node_mask, - Aspectext_oneapi_exportable_device_mem], + Aspectext_oneapi_exportable_device_mem, + Aspectext_oneapi_clock_sub_group, + Aspectext_oneapi_clock_work_group, + Aspectext_oneapi_clock_device], []>; // This definition serves the only purpose of testing whether the deprecated aspect list defined in here and in SYCL RT // match. diff --git a/sycl/include/sycl/ext/oneapi/experimental/clock.hpp b/sycl/include/sycl/ext/oneapi/experimental/clock.hpp new file mode 100644 index 0000000000000..f0cf05b2b3bd9 --- /dev/null +++ b/sycl/include/sycl/ext/oneapi/experimental/clock.hpp @@ -0,0 +1,74 @@ +//==-------- clock.hpp --- SYCL extension for clock() free function --------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#pragma once + +#include +#include +#include + +namespace sycl { +inline namespace _V1 { +namespace ext::oneapi::experimental { + +enum class clock_scope : int { + // Aligned with SPIR-V Scope values. + device = 1, + work_group = 2, + sub_group = 3 +}; + +namespace detail { +template inline uint64_t clock_impl() { +#ifdef __SYCL_DEVICE_ONLY__ +#if defined(__NVPTX__) || defined(__AMDGCN__) + // Currently clock() is not supported on NVPTX and AMDGCN. + return 0; +#else + return __spirv_ReadClockKHR(static_cast(Scope)); +#endif // defined(__NVPTX__) || defined(__AMDGCN__) +#else + throw sycl::exception( + make_error_code(errc::runtime), + "sycl::ext::oneapi::experimental::clock() is not supported on host."); +#endif // __SYCL_DEVICE_ONLY__ +} +} // namespace detail + +template inline uint64_t clock(); + +// Specialization for device. +template <> +#ifdef __SYCL_DEVICE_ONLY__ +[[__sycl_detail__::__uses_aspects__(sycl::aspect::ext_oneapi_clock_device)]] +#endif +inline uint64_t clock() { + return detail::clock_impl(); +} + +// Specialization for work-group. +template <> +#ifdef __SYCL_DEVICE_ONLY__ +[[__sycl_detail__::__uses_aspects__(sycl::aspect::ext_oneapi_clock_work_group)]] +#endif +inline uint64_t clock() { + return detail::clock_impl(); +} + +// Specialization for sub-group. +template <> +#ifdef __SYCL_DEVICE_ONLY__ +[[__sycl_detail__::__uses_aspects__(sycl::aspect::ext_oneapi_clock_sub_group)]] +#endif +inline uint64_t clock() { + return detail::clock_impl(); +} + +} // namespace ext::oneapi::experimental +} // namespace _V1 +} // namespace sycl diff --git a/sycl/include/sycl/info/aspects.def b/sycl/include/sycl/info/aspects.def index 00816611233d2..d3e97a47a0248 100644 --- a/sycl/include/sycl/info/aspects.def +++ b/sycl/include/sycl/info/aspects.def @@ -80,4 +80,6 @@ __SYCL_ASPECT(ext_oneapi_async_memory_alloc, 87) __SYCL_ASPECT(ext_intel_device_info_luid, 88) __SYCL_ASPECT(ext_intel_device_info_node_mask, 89) __SYCL_ASPECT(ext_oneapi_exportable_device_mem, 90) - +__SYCL_ASPECT(ext_oneapi_clock_sub_group, 91) +__SYCL_ASPECT(ext_oneapi_clock_work_group, 92) +__SYCL_ASPECT(ext_oneapi_clock_device, 93) diff --git a/sycl/include/sycl/sycl.hpp b/sycl/include/sycl/sycl.hpp index 56d8fceb34dc2..a09870dd77c30 100644 --- a/sycl/include/sycl/sycl.hpp +++ b/sycl/include/sycl/sycl.hpp @@ -113,6 +113,7 @@ can be disabled by setting SYCL_DISABLE_FSYCL_SYCLHPP_WARNING macro.") #include #include #include +#include #include #include #include diff --git a/sycl/source/detail/device_impl.hpp b/sycl/source/detail/device_impl.hpp index e46633d9fab45..38214254595c6 100644 --- a/sycl/source/detail/device_impl.hpp +++ b/sycl/source/detail/device_impl.hpp @@ -1579,6 +1579,18 @@ class device_impl : public std::enable_shared_from_this { UR_DEVICE_INFO_MEMORY_EXPORT_EXPORTABLE_DEVICE_MEM_EXP>() .value_or(0); } + CASE(ext_oneapi_clock_sub_group) { + // Will be updated in a follow-up UR patch. + return false; + } + CASE(ext_oneapi_clock_work_group) { + // Will be updated in a follow-up UR patch. + return false; + } + CASE(ext_oneapi_clock_device) { + // Will be updated in a follow-up UR patch. + return false; + } else { return false; // This device aspect has not been implemented yet. } diff --git a/sycl/source/feature_test.hpp.in b/sycl/source/feature_test.hpp.in index 5b5cab4e0fc48..86040d75db03a 100644 --- a/sycl/source/feature_test.hpp.in +++ b/sycl/source/feature_test.hpp.in @@ -120,6 +120,7 @@ inline namespace _V1 { #define SYCL_KHR_FREE_FUNCTION_COMMANDS 1 #define SYCL_KHR_QUEUE_EMPTY_QUERY 1 #define SYCL_EXT_ONEAPI_MEMORY_EXPORT 1 +#define SYCL_EXT_ONEAPI_CLOCK 1 // In progress yet #define SYCL_EXT_ONEAPI_ATOMIC16 0 #define SYCL_KHR_DEFAULT_CONTEXT 1 diff --git a/sycl/test-e2e/Experimental/clock.cpp b/sycl/test-e2e/Experimental/clock.cpp new file mode 100644 index 0000000000000..604900d87294a --- /dev/null +++ b/sycl/test-e2e/Experimental/clock.cpp @@ -0,0 +1,42 @@ +// REQUIRES: aspect-usm_shared_allocations +// REQUIRES: aspect-ext_oneapi_clock_sub_group || aspect-ext_oneapi_clock_work_group || aspect-ext_oneapi_clock_device +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out + +#include +#include +#include + +namespace syclex = sycl::ext::oneapi::experimental; + +template void test() { + sycl::queue q; + if (!q.get_device().has(aspect)) + return; + + uint64_t *data = sycl::malloc_shared(2, q); + + q.parallel_for(2, [=](sycl::id<1> idx) { + if (idx == 0) { + data[0] = syclex::clock(); + int count = 0; + for (int i = 0; i < 1e6; ++i) + count++; + data[1] = syclex::clock(); + } + }); + q.wait(); + + assert(data[1] > data[0]); + sycl::free(data, q); +} + +int main() { + test(); + test(); + test(); + + return 0; +}