Skip to content

Commit f63caae

Browse files
committed
aligh with new spec changes
1 parent 4c75261 commit f63caae

File tree

5 files changed

+93
-41
lines changed

5 files changed

+93
-41
lines changed

llvm/include/llvm/SYCLLowerIR/DeviceConfigFile.td

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,9 @@ def AspectExt_oneapi_async_memory_alloc : Aspect<"ext_oneapi_async_memory_alloc"
9494
def AspectExt_intel_device_info_luid : Aspect<"ext_intel_device_info_luid">;
9595
def AspectExt_intel_device_info_node_mask : Aspect<"ext_intel_device_info_node_mask">;
9696
def Aspectext_oneapi_exportable_device_mem : Aspect<"ext_oneapi_exportable_device_mem">;
97-
def Aspectext_oneapi_clock : Aspect<"ext_oneapi_clock">;
97+
def Aspectext_oneapi_clock_sub_group : Aspect<"ext_oneapi_clock_sub_group">;
98+
def Aspectext_oneapi_clock_work_group : Aspect<"ext_oneapi_clock_work_group">;
99+
def Aspectext_oneapi_clock_device : Aspect<"ext_oneapi_clock_device">;
98100

99101
// Deprecated aspects
100102
def AspectInt64_base_atomics : Aspect<"int64_base_atomics">;
@@ -170,7 +172,9 @@ def : TargetInfo<"__TestAspectList",
170172
AspectExt_intel_device_info_luid,
171173
AspectExt_intel_device_info_node_mask,
172174
Aspectext_oneapi_exportable_device_mem,
173-
Aspectext_oneapi_clock],
175+
Aspectext_oneapi_clock_sub_group,
176+
Aspectext_oneapi_clock_work_group,
177+
Aspectext_oneapi_clock_device],
174178
[]>;
175179
// This definition serves the only purpose of testing whether the deprecated aspect list defined in here and in SYCL RT
176180
// match.

sycl/include/sycl/ext/oneapi/experimental/clock.hpp

Lines changed: 36 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -17,17 +17,14 @@ inline namespace _V1 {
1717
namespace ext::oneapi::experimental {
1818

1919
enum class clock_scope : int {
20-
// Aligned with SPIR-V Scope<id> values
20+
// Aligned with SPIR-V Scope<id> values.
2121
device = 1,
2222
work_group = 2,
2323
sub_group = 3
2424
};
2525

26-
#ifdef __SYCL_DEVICE_ONLY__
27-
[[__sycl_detail__::__uses_aspects__(sycl::aspect::ext_oneapi_clock)]]
28-
#endif // __SYCL_DEVICE_ONLY__
29-
inline uint64_t
30-
clock([[maybe_unused]] clock_scope scope = clock_scope::sub_group) {
26+
namespace detail {
27+
inline uint64_t clock_impl(clock_scope scope) {
3128
#ifdef __SYCL_DEVICE_ONLY__
3229
#if defined(__NVPTX__) || defined(__AMDGCN__)
3330
// Currently clock() is not supported on NVPTX and AMDGCN.
@@ -37,10 +34,41 @@ clock([[maybe_unused]] clock_scope scope = clock_scope::sub_group) {
3734
#endif // defined(__NVPTX__) || defined(__AMDGCN__)
3835
#else
3936
throw sycl::exception(
40-
make_error_code(errc::runtime),
41-
"sycl::ext::oneapi::experimental::clock() is not supported on host.");
37+
make_error_code(errc::runtime),
38+
"sycl::ext::oneapi::experimental::clock() is not supported on host.");
4239
#endif // __SYCL_DEVICE_ONLY__
4340
}
41+
} // namespace detail
42+
43+
template <clock_scope Scope>
44+
inline uint64_t clock();
45+
46+
// Specialization for device.
47+
template <>
48+
#ifdef __SYCL_DEVICE_ONLY__
49+
[[__sycl_detail__::__uses_aspects__(sycl::aspect::ext_oneapi_clock_device)]]
50+
#endif
51+
inline uint64_t clock<clock_scope::device>() {
52+
return detail::clock_impl(clock_scope::device);
53+
}
54+
55+
// Specialization for work-group.
56+
template <>
57+
#ifdef __SYCL_DEVICE_ONLY__
58+
[[__sycl_detail__::__uses_aspects__(sycl::aspect::ext_oneapi_clock_work_group)]]
59+
#endif
60+
inline uint64_t clock<clock_scope::work_group>() {
61+
return detail::clock_impl(clock_scope::work_group);
62+
}
63+
64+
// Specialization for sub-group.
65+
template <>
66+
#ifdef __SYCL_DEVICE_ONLY__
67+
[[__sycl_detail__::__uses_aspects__(sycl::aspect::ext_oneapi_clock_sub_group)]]
68+
#endif
69+
inline uint64_t clock<clock_scope::sub_group>() {
70+
return detail::clock_impl(clock_scope::sub_group);
71+
}
4472

4573
} // namespace ext::oneapi::experimental
4674
} // namespace _V1

sycl/include/sycl/info/aspects.def

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -80,5 +80,6 @@ __SYCL_ASPECT(ext_oneapi_async_memory_alloc, 87)
8080
__SYCL_ASPECT(ext_intel_device_info_luid, 88)
8181
__SYCL_ASPECT(ext_intel_device_info_node_mask, 89)
8282
__SYCL_ASPECT(ext_oneapi_exportable_device_mem, 90)
83-
__SYCL_ASPECT(ext_oneapi_clock, 91)
84-
83+
__SYCL_ASPECT(ext_oneapi_clock_sub_group, 91)
84+
__SYCL_ASPECT(ext_oneapi_clock_work_group, 92)
85+
__SYCL_ASPECT(ext_oneapi_clock_device, 93)

sycl/source/detail/device_impl.hpp

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
#pragma once
1010

1111
#include <detail/helpers.hpp>
12+
#include <detail/kernel_compiler/kernel_compiler_opencl.hpp>
1213
#include <detail/platform_impl.hpp>
1314
#include <detail/program_manager/program_manager.hpp>
1415
#include <sycl/aspects.hpp>
@@ -1579,7 +1580,27 @@ class device_impl : public std::enable_shared_from_this<device_impl> {
15791580
UR_DEVICE_INFO_MEMORY_EXPORT_EXPORTABLE_DEVICE_MEM_EXP>()
15801581
.value_or(0);
15811582
}
1582-
CASE(ext_oneapi_clock) { return has_extension("cl_khr_kernel_clock"); }
1583+
else if constexpr (Aspect == aspect::ext_oneapi_clock_sub_group ||
1584+
Aspect == aspect::ext_oneapi_clock_work_group ||
1585+
Aspect == aspect::ext_oneapi_clock_device) {
1586+
detail::adapter_impl &Adapter = getAdapter();
1587+
uint32_t ipVersion = 0;
1588+
auto res = Adapter.call_nocheck<detail::UrApiKind::urDeviceGetInfo>(
1589+
getHandleRef(), UR_DEVICE_INFO_IP_VERSION, sizeof(uint32_t),
1590+
&ipVersion, nullptr);
1591+
if (res != UR_RESULT_SUCCESS)
1592+
return false;
1593+
std::string Feature;
1594+
if (Aspect == aspect::ext_oneapi_clock_sub_group) {
1595+
Feature = "__opencl_c_kernel_clock_scope_sub_group";
1596+
} else if (Aspect == aspect::ext_oneapi_clock_work_group) {
1597+
Feature = "__opencl_c_kernel_clock_scope_work_group";
1598+
} else if (Aspect == aspect::ext_oneapi_clock_device) {
1599+
Feature = "__opencl_c_kernel_clock_scope_device";
1600+
}
1601+
return ext::oneapi::experimental::detail::OpenCLC_Feature_Available(
1602+
std::string(Feature), ipVersion);
1603+
}
15831604
else {
15841605
return false; // This device aspect has not been implemented yet.
15851606
}
Lines changed: 26 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -1,43 +1,41 @@
1-
// REQUIRES: aspect-ext_oneapi_clock, aspect-usm_shared_allocations
1+
// REQUIRES: aspect-usm_shared_allocations
22
// RUN: %{build} -o %t.out
33
// RUN: %{run} %t.out
44

55
#include <sycl/detail/core.hpp>
66
#include <sycl/ext/oneapi/experimental/clock.hpp>
77
#include <sycl/usm.hpp>
88

9-
int main() {
9+
namespace syclex = sycl::ext::oneapi::experimental;
10+
11+
template <syclex::clock_scope scope, sycl::aspect aspect> void test() {
1012
sycl::queue q;
11-
uint64_t *data = sycl::malloc_shared<uint64_t>(3, q);
12-
13-
q.single_task([=]() {
14-
uint64_t sg_clock_start = sycl::ext::oneapi::experimental::clock(
15-
sycl::ext::oneapi::experimental::clock_scope::sub_group);
16-
uint64_t wg_clock_start = sycl::ext::oneapi::experimental::clock(
17-
sycl::ext::oneapi::experimental::clock_scope::work_group);
18-
uint64_t dev_clock_start = sycl::ext::oneapi::experimental::clock(
19-
sycl::ext::oneapi::experimental::clock_scope::device);
20-
21-
int count = 0;
22-
for (int i = 0; i < 1e6; ++i)
23-
count++;
24-
25-
uint64_t sg_clock_end = sycl::ext::oneapi::experimental::clock(
26-
sycl::ext::oneapi::experimental::clock_scope::sub_group);
27-
uint64_t wg_clock_end = sycl::ext::oneapi::experimental::clock(
28-
sycl::ext::oneapi::experimental::clock_scope::work_group);
29-
uint64_t dev_clock_end = sycl::ext::oneapi::experimental::clock(
30-
sycl::ext::oneapi::experimental::clock_scope::device);
31-
data[0] = sg_clock_end - sg_clock_start;
32-
data[1] = wg_clock_end - wg_clock_start;
33-
data[2] = dev_clock_end - dev_clock_start;
13+
if (!q.get_device().has(aspect))
14+
return;
15+
16+
uint64_t *data = sycl::malloc_shared<uint64_t>(2, q);
17+
18+
q.parallel_for(2, [=](sycl::id<1> idx) {
19+
if (idx == 0) {
20+
data[0] = syclex::clock<scope>();
21+
int count = 0;
22+
for (int i = 0; i < 1e6; ++i)
23+
count++;
24+
data[1] = syclex::clock<scope>();
25+
}
3426
});
3527
q.wait();
3628

37-
assert(data[0] > 0);
38-
assert(data[1] > 0);
39-
assert(data[2] > 0);
29+
assert(data[1] > data[0]);
4030
sycl::free(data, q);
31+
}
32+
33+
int main() {
34+
test<syclex::clock_scope::sub_group,
35+
sycl::aspect::ext_oneapi_clock_sub_group>();
36+
test<syclex::clock_scope::work_group,
37+
sycl::aspect::ext_oneapi_clock_work_group>();
38+
test<syclex::clock_scope::device, sycl::aspect::ext_oneapi_clock_device>();
4139

4240
return 0;
4341
}

0 commit comments

Comments
 (0)