Skip to content

Commit 42fc5e9

Browse files
[SYCL] Implement sycl_ext_oneapi_profiling_tag extension (intel#12838)
This commit adds the implementation of the sycl_ext_oneapi_profiling_tag extension. Moving the extension to experimental will happen in a follow-up patch. --------- Signed-off-by: Larsen, Steffen <[email protected]>
1 parent da379ec commit 42fc5e9

34 files changed

+407
-5
lines changed

llvm/include/llvm/SYCLLowerIR/DeviceConfigFile.td

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,7 @@ def AspectExt_oneapi_graph : Aspect<"ext_oneapi_graph">;
8181
def AspectExt_intel_fpga_task_sequence : Aspect<"ext_intel_fpga_task_sequence">;
8282
def AspectExt_oneapi_limited_graph : Aspect<"ext_oneapi_limited_graph">;
8383
def AspectExt_oneapi_private_alloca : Aspect<"ext_oneapi_private_alloca">;
84+
def AspectExt_oneapi_queue_profiling_tag : Aspect<"ext_oneapi_queue_profiling_tag">;
8485
// Deprecated aspects
8586
def AspectInt64_base_atomics : Aspect<"int64_base_atomics">;
8687
def AspectInt64_extended_atomics : Aspect<"int64_extended_atomics">;
@@ -138,8 +139,8 @@ def : TargetInfo<"__TestAspectList",
138139
AspectExt_oneapi_ballot_group, AspectExt_oneapi_fixed_size_group, AspectExt_oneapi_opportunistic_group,
139140
AspectExt_oneapi_tangle_group, AspectExt_intel_matrix, AspectExt_oneapi_is_composite, AspectExt_oneapi_is_component,
140141
AspectExt_oneapi_graph, AspectExt_intel_fpga_task_sequence, AspectExt_oneapi_limited_graph,
141-
AspectExt_oneapi_private_alloca],
142-
[]>;
142+
AspectExt_oneapi_private_alloca, AspectExt_oneapi_queue_profiling_tag],
143+
[]>;
143144
// This definition serves the only purpose of testing whether the deprecated aspect list defined in here and in SYCL RT
144145
// match.
145146
def : TargetInfo<"__TestDeprecatedAspectList",

sycl/include/sycl/detail/cg.hpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,7 @@ class CG {
7878
CopyImage = 23,
7979
SemaphoreWait = 24,
8080
SemaphoreSignal = 25,
81+
ProfilingTag = 26,
8182
};
8283

8384
struct StorageInitHelper {
@@ -344,6 +345,12 @@ class CGBarrier : public CG {
344345
MEventsWaitWithBarrier(std::move(EventsWaitWithBarrier)) {}
345346
};
346347

348+
class CGProfilingTag : public CG {
349+
public:
350+
CGProfilingTag(CG::StorageInitHelper CGData, detail::code_location loc = {})
351+
: CG(CG::ProfilingTag, std::move(CGData), std::move(loc)) {}
352+
};
353+
347354
/// "Copy 2D USM" command group class.
348355
class CGCopy2DUSM : public CG {
349356
void *MSrc;

sycl/include/sycl/detail/pi.def

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,7 @@ _PI_API(piEventRetain)
100100
_PI_API(piEventRelease)
101101
_PI_API(piextEventGetNativeHandle)
102102
_PI_API(piextEventCreateWithNativeHandle)
103+
_PI_API(piEnqueueTimestampRecordingExp)
103104
// Sampler
104105
_PI_API(piSamplerCreate)
105106
_PI_API(piSamplerGetInfo)

sycl/include/sycl/detail/pi.h

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -174,9 +174,11 @@
174174
// - PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D
175175
// 15.51 Removed ret_mem argument from piextMemUnsampledImageCreate and
176176
// piextMemSampledImageCreate
177+
// 15.52 Added piEnqueueTimestampRecordingExp and
178+
// PI_EXT_ONEAPI_DEVICE_INFO_TIMESTAMP_RECORDING_SUPPORT.
177179

178180
#define _PI_H_VERSION_MAJOR 15
179-
#define _PI_H_VERSION_MINOR 51
181+
#define _PI_H_VERSION_MINOR 52
180182

181183
#define _PI_STRING_HELPER(a) #a
182184
#define _PI_CONCAT(a, b) _PI_STRING_HELPER(a.b)
@@ -484,6 +486,9 @@ typedef enum {
484486
PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D = 0x2011A,
485487
PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_USM = 0x2011B,
486488
PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D = 0x2011C,
489+
490+
// Timestamp enqueue
491+
PI_EXT_ONEAPI_DEVICE_INFO_TIMESTAMP_RECORDING_SUPPORT = 0x2011D,
487492
} _pi_device_info;
488493

489494
typedef enum {
@@ -1774,6 +1779,10 @@ __SYCL_EXPORT pi_result piEventRetain(pi_event event);
17741779

17751780
__SYCL_EXPORT pi_result piEventRelease(pi_event event);
17761781

1782+
__SYCL_EXPORT pi_result piEnqueueTimestampRecordingExp(
1783+
pi_queue queue, pi_bool blocking, pi_uint32 num_events_in_wait_list,
1784+
const pi_event *event_wait_list, pi_event *event);
1785+
17771786
/// Gets the native handle of a PI event object.
17781787
///
17791788
/// \param event is the PI event to get the native handle of.

sycl/include/sycl/device_aspect_macros.hpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -376,6 +376,11 @@
376376
#define __SYCL_ALL_DEVICES_HAVE_ext_oneapi_bindless_sampled_image_fetch_3d__ 0
377377
#endif
378378

379+
#ifndef __SYCL_ALL_DEVICES_HAVE_ext_oneapi_queue_profiling_tag__
380+
// __SYCL_ASPECT(ext_oneapi_queue_profiling_tag, 73)
381+
#define __SYCL_ALL_DEVICES_HAVE_ext_oneapi_queue_profiling_tag__ 0
382+
#endif
383+
379384
#ifndef __SYCL_ANY_DEVICE_HAS_host__
380385
// __SYCL_ASPECT(host, 0)
381386
#define __SYCL_ANY_DEVICE_HAS_host__ 0
@@ -740,3 +745,8 @@
740745
//__SYCL_ASPECT(ext_oneapi_bindless_sampled_image_fetch_3d, 72)
741746
#define __SYCL_ANY_DEVICE_HAS_ext_oneapi_bindless_sampled_image_fetch_3d__ 0
742747
#endif
748+
749+
#ifndef __SYCL_ANY_DEVICE_HAS_ext_oneapi_queue_profiling_tag__
750+
// __SYCL_ASPECT(ext_oneapi_queue_profiling_tag, 73)
751+
#define __SYCL_ANY_DEVICE_HAS_ext_oneapi_queue_profiling_tag__ 0
752+
#endif
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
//==--------- profiling_tag.hpp --- SYCL profiling tag extension -----------==//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#pragma once
10+
11+
#include <sycl/aspects.hpp>
12+
#include <sycl/event.hpp>
13+
#include <sycl/handler.hpp>
14+
#include <sycl/properties/queue_properties.hpp>
15+
#include <sycl/queue.hpp>
16+
17+
namespace sycl {
18+
inline namespace _V1 {
19+
namespace ext::oneapi::experimental {
20+
21+
inline event submit_profiling_tag(queue &Queue,
22+
const sycl::detail::code_location &CodeLoc =
23+
sycl::detail::code_location::current()) {
24+
if (Queue.get_device().has(aspect::ext_oneapi_queue_profiling_tag)) {
25+
return Queue.submit(
26+
[=](handler &CGH) {
27+
sycl::detail::HandlerAccess::internalProfilingTagImpl(CGH);
28+
},
29+
CodeLoc);
30+
}
31+
32+
// If it is not supported natively on the device, we use another path if
33+
// profiling is enabled.
34+
if (!Queue.has_property<sycl::property::queue::enable_profiling>())
35+
throw sycl::exception(
36+
make_error_code(errc::invalid),
37+
"Device must either have aspect::ext_oneapi_queue_profiling_tag or the "
38+
"queue must have profiling enabled.");
39+
return Queue.ext_oneapi_submit_barrier();
40+
}
41+
42+
} // namespace ext::oneapi::experimental
43+
} // namespace _V1
44+
} // namespace sycl

sycl/include/sycl/handler.hpp

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -175,6 +175,7 @@ class stream_impl;
175175
template <typename DataT, int Dimensions, access::mode AccessMode,
176176
access::target AccessTarget, access::placeholder IsPlaceholder>
177177
class image_accessor;
178+
class HandlerAccess;
178179
template <typename RetType, typename Func, typename Arg>
179180
static Arg member_ptr_helper(RetType (Func::*)(Arg) const);
180181

@@ -3676,12 +3677,29 @@ class __SYCL_EXPORT handler {
36763677
// Set that an ND Range was used during a call to parallel_for
36773678
void setNDRangeUsed(bool Value);
36783679

3680+
inline void internalProfilingTagImpl() {
3681+
throwIfActionIsCreated();
3682+
setType(detail::CG::ProfilingTag);
3683+
}
3684+
3685+
friend class detail::HandlerAccess;
3686+
36793687
protected:
36803688
/// Registers event dependencies in this command group.
36813689
void depends_on(const detail::EventImplPtr &Event);
36823690
/// Registers event dependencies in this command group.
36833691
void depends_on(const std::vector<detail::EventImplPtr> &Events);
36843692
};
3693+
3694+
namespace detail {
3695+
class HandlerAccess {
3696+
public:
3697+
static void internalProfilingTagImpl(handler &Handler) {
3698+
Handler.internalProfilingTagImpl();
3699+
}
3700+
};
3701+
} // namespace detail
3702+
36853703
} // namespace _V1
36863704
} // namespace sycl
36873705

sycl/include/sycl/info/aspects.def

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,3 +67,4 @@ __SYCL_ASPECT(ext_oneapi_bindless_sampled_image_fetch_2d_usm, 69)
6767
__SYCL_ASPECT(ext_oneapi_bindless_sampled_image_fetch_2d, 70)
6868
__SYCL_ASPECT(ext_oneapi_bindless_sampled_image_fetch_3d_usm, 71)
6969
__SYCL_ASPECT(ext_oneapi_bindless_sampled_image_fetch_3d, 72)
70+
__SYCL_ASPECT(ext_oneapi_queue_profiling_tag, 73)

sycl/include/sycl/sycl.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,7 @@
9696
#include <sycl/ext/oneapi/experimental/group_load_store.hpp>
9797
#include <sycl/ext/oneapi/experimental/opportunistic_group.hpp>
9898
#include <sycl/ext/oneapi/experimental/prefetch.hpp>
99+
#include <sycl/ext/oneapi/experimental/profiling_tag.hpp>
99100
#include <sycl/ext/oneapi/experimental/root_group.hpp>
100101
#include <sycl/ext/oneapi/experimental/tangle_group.hpp>
101102
#include <sycl/ext/oneapi/filter_selector.hpp>

sycl/plugins/cuda/pi_cuda.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -631,6 +631,14 @@ pi_result piextEventCreateWithNativeHandle(pi_native_handle NativeHandle,
631631
OwnNativeHandle, Event);
632632
}
633633

634+
pi_result piEnqueueTimestampRecordingExp(pi_queue Queue, pi_bool Blocking,
635+
pi_uint32 NumEventsInWaitList,
636+
const pi_event *EventWaitList,
637+
pi_event *Event) {
638+
return pi2ur::piEnqueueTimestampRecordingExp(
639+
Queue, Blocking, NumEventsInWaitList, EventWaitList, Event);
640+
}
641+
634642
pi_result piSamplerCreate(pi_context Context,
635643
const pi_sampler_properties *SamplerProperties,
636644
pi_sampler *RetSampler) {

0 commit comments

Comments
 (0)