Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
63 commits
Select commit Hold shift + click to select a range
decadb0
prepare checks
KseniyaTikhomirova Feb 26, 2025
c1fce6f
create event for host task
KseniyaTikhomirova Feb 26, 2025
766b6d4
further plan
KseniyaTikhomirova Feb 26, 2025
87adb1d
Merge branch 'sycl' into host_task_on_user_events
KseniyaTikhomirova Mar 6, 2025
8053df4
add EventHostSignal to UR API and generate sources
KseniyaTikhomirova Mar 6, 2025
00cbd9b
add method to adapters
KseniyaTikhomirova Mar 6, 2025
0025a40
add event signalling
KseniyaTikhomirova Mar 6, 2025
01584f3
enable legacy code
KseniyaTikhomirova Mar 7, 2025
eeb950c
keep host task as dependency since backend knows nothing about it
KseniyaTikhomirova Mar 7, 2025
e4611ef
setComplete logic
KseniyaTikhomirova Mar 7, 2025
e7bd3f3
fix format
KseniyaTikhomirova Mar 7, 2025
6946153
tiny test for signalling logic
KseniyaTikhomirova Mar 10, 2025
b23dac9
remove comments
KseniyaTikhomirova Mar 10, 2025
cd20087
fix queue sync
KseniyaTikhomirova Mar 12, 2025
d24c021
to save, not working, to try EnqueueEventsWait approach
KseniyaTikhomirova Mar 12, 2025
c66a520
align producesPiEvent and EventHandle presence for AllocaCommand
KseniyaTikhomirova Mar 14, 2025
67b39db
addition + ReleaseCommand assert
KseniyaTikhomirova Mar 14, 2025
4ef22db
more commands are covered
KseniyaTikhomirova Mar 18, 2025
c8013d7
covered the rest commands
KseniyaTikhomirova Mar 18, 2025
b988e7f
fixes
KseniyaTikhomirova Mar 18, 2025
27517b0
fix test
KseniyaTikhomirova Mar 18, 2025
f76c399
fix tests
KseniyaTikhomirova Mar 19, 2025
c20cfa9
remove redundant comment
KseniyaTikhomirova Mar 19, 2025
c593868
Merge branch 'alignHandleWithProducesPiEvent' into HT_user_events_1
KseniyaTikhomirova Mar 19, 2025
36774ac
WorkerContext update
KseniyaTikhomirova Mar 20, 2025
351b6fc
fix dependencies handling
KseniyaTikhomirova Mar 20, 2025
c5e15e9
udpate buffer command
KseniyaTikhomirova Mar 20, 2025
c1cd375
align producesPiEvent and EventHandle presence for AllocaCommand
KseniyaTikhomirova Mar 14, 2025
be78aeb
addition + ReleaseCommand assert
KseniyaTikhomirova Mar 14, 2025
a9c4190
Merge branch 'sycl' into HT_user_events_1
KseniyaTikhomirova Mar 21, 2025
0549e63
fix merge issues
KseniyaTikhomirova Mar 21, 2025
bb2565a
remove check from allocaCommand
KseniyaTikhomirova Mar 24, 2025
4797df6
fix update buffer commands check
KseniyaTikhomirova Mar 24, 2025
fbc0022
Merge branch 'alignHandleWithProducesPiEvent' into HT_user_events_1
KseniyaTikhomirova Mar 24, 2025
4d1f62d
turns on user events path to test
KseniyaTikhomirova Mar 24, 2025
9a29376
fix
KseniyaTikhomirova Mar 25, 2025
803a2f5
draft fix
KseniyaTikhomirova Mar 25, 2025
87ebd14
Merge branch 'alignHandleWithProducesPiEvent' into HT_user_events_1
KseniyaTikhomirova Mar 25, 2025
dc00694
check normal path
KseniyaTikhomirova Mar 26, 2025
18f2c18
fix lock specialization
KseniyaTikhomirova Mar 26, 2025
06fec7d
fix host profiling info
KseniyaTikhomirova Mar 26, 2025
1e36d65
use the right queue for memory migration
KseniyaTikhomirova Mar 26, 2025
17bf93a
fix second lock specialization
KseniyaTikhomirova Mar 27, 2025
29e73e0
fix mem migration check
KseniyaTikhomirova Mar 27, 2025
55ec497
Merge branch 'sycl' into HT_user_events_1
KseniyaTikhomirova Mar 27, 2025
2c56b94
enable path by default
KseniyaTikhomirova Mar 27, 2025
512d8cb
fix format
KseniyaTikhomirova Mar 27, 2025
83a6d12
test
KseniyaTikhomirova Mar 28, 2025
1851e28
signal host as usual
KseniyaTikhomirova Mar 31, 2025
6b61562
fix deadlock in UR L0 adapter
KseniyaTikhomirova Apr 1, 2025
c67ce60
fix 2nd hang
KseniyaTikhomirova Apr 2, 2025
1a0af48
Merge branch 'sycl' into host_task_user_events_way_forced
KseniyaTikhomirova Apr 4, 2025
f61336e
user events path can stably work with L0 v2 only
KseniyaTikhomirova Apr 4, 2025
a6d1ae8
set config properly
KseniyaTikhomirova Apr 7, 2025
f76aae5
remove default event signalling for v2 adapter
KseniyaTikhomirova Apr 7, 2025
718ad0a
Merge branch 'sycl' into host_task_user_events_way_forced
KseniyaTikhomirova Apr 8, 2025
0b7d67b
enable opencl
KseniyaTikhomirova Apr 10, 2025
d209042
Merge branch 'sycl' into host_task_user_events_way_forced
KseniyaTikhomirova Apr 17, 2025
0a1b491
disable events path for USM shared
KseniyaTikhomirova Apr 17, 2025
cc3a9ab
Merge branch 'backup' into host_task_user_events_way_forced
KseniyaTikhomirova Apr 17, 2025
f5fcbff
Merge branch 'sycl' into host_task_user_events_way_forced
KseniyaTikhomirova Apr 17, 2025
b5ced3f
Merge branch 'sycl' into host_task_user_events_way_forced
KseniyaTikhomirova May 5, 2025
0fad587
fairly regenerate ur sources
KseniyaTikhomirova May 5, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion sycl/source/detail/device_global_map_entry.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,9 @@ DeviceGlobalUSMMem &DeviceGlobalMapEntry::getOrAllocateDeviceGlobalUSM(
"USM allocations should not be acquired for device_global with "
"device_image_scope property.");
const std::shared_ptr<context_impl> &CtxImpl = QueueImpl->getContextImplPtr();
const device_impl &DevImpl = QueueImpl->getDeviceImpl();
// DevImpl is not const since alignedAllocInternal may add mark about shared
// USM usage needed for host task handling.
device_impl &DevImpl = QueueImpl->getDeviceImpl();
std::lock_guard<std::mutex> Lock(MDeviceToUSMPtrMapMutex);

auto DGUSMPtr = MDeviceToUSMPtrMap.find({&DevImpl, CtxImpl.get()});
Expand Down
4 changes: 4 additions & 0 deletions sycl/source/detail/device_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -296,6 +296,9 @@ class device_impl : public std::enable_shared_from_this<device_impl> {
/// Get device architecture
ext::oneapi::experimental::architecture getDeviceArch() const;

void setUSMAllocationPresent() { MSharedUSMAllocationPresent = true; }
bool isUSMAllocationPresent() const { return MSharedUSMAllocationPresent; }

private:
ur_device_handle_t MDevice = 0;
ur_device_type_t MType;
Expand All @@ -307,6 +310,7 @@ class device_impl : public std::enable_shared_from_this<device_impl> {
mutable ext::oneapi::experimental::architecture MDeviceArch{};
mutable std::once_flag MDeviceArchFlag;
std::pair<uint64_t, uint64_t> MDeviceHostBaseTime{0, 0};
bool MSharedUSMAllocationPresent{};
}; // class device_impl

} // namespace detail
Expand Down
35 changes: 23 additions & 12 deletions sycl/source/detail/event_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,14 @@ event_impl::event_impl(ur_event_handle_t Event, const context &SyclContext)
}
}

void event_impl::allocateHostProfilingInfo() {
MHostProfilingInfo.reset(new HostProfilingInfo());
if (!MHostProfilingInfo)
throw sycl::exception(sycl::make_error_code(sycl::errc::runtime),
"Out of host memory " +
codeToString(UR_RESULT_ERROR_OUT_OF_HOST_MEMORY));
}

event_impl::event_impl(const QueueImplPtr &Queue)
: MQueue{Queue}, MIsProfilingEnabled{!Queue || Queue->MIsProfilingEnabled},
MFallbackProfiling{MIsProfilingEnabled && Queue &&
Expand All @@ -167,12 +175,7 @@ event_impl::event_impl(const QueueImplPtr &Queue)
this->setContextImpl(Queue->getContextImplPtr());
else {
MState.store(HES_NotComplete);
MHostProfilingInfo.reset(new HostProfilingInfo());
if (!MHostProfilingInfo)
throw sycl::exception(
sycl::make_error_code(sycl::errc::runtime),
"Out of host memory " +
codeToString(UR_RESULT_ERROR_OUT_OF_HOST_MEMORY));
allocateHostProfilingInfo();
return;
}
MState.store(HES_Complete);
Expand Down Expand Up @@ -400,7 +403,7 @@ uint64_t event_impl::get_profiling_info<info::event_profiling::command_end>() {

template <> uint32_t event_impl::get_info<info::event::reference_count>() {
auto Handle = this->getHandle();
if (!MIsHostEvent && Handle) {
if (Handle) {
return get_event_info<info::event::reference_count>(Handle,
this->getAdapter());
}
Expand Down Expand Up @@ -496,18 +499,20 @@ void HostProfilingInfo::start() { StartTime = getTimestamp(); }
void HostProfilingInfo::end() { EndTime = getTimestamp(); }

ur_native_handle_t event_impl::getNative() {
if (isHost())
auto Handle = getHandle();
if (MIsHostEvent && !Handle)
return {};
initContextIfNeeded();

initContextIfNeeded();
auto Adapter = getAdapter();
auto Handle = getHandle();

if (MIsDefaultConstructed && !Handle) {
auto TempContext = MContext.get()->getHandleRef();
ur_event_native_properties_t NativeProperties{};
ur_event_handle_t UREvent = nullptr;
Adapter->call<UrApiKind::urEventCreateWithNativeHandle>(
0, TempContext, &NativeProperties, &UREvent);
Adapter->call<UrApiKind::urEventHostSignal>(UREvent);
this->setHandle(UREvent);
Handle = UREvent;
}
Expand Down Expand Up @@ -631,8 +636,14 @@ bool event_impl::isCompleted() {
void event_impl::setCommand(void *Cmd) {
MCommand = Cmd;
auto TypedCommand = static_cast<Command *>(Cmd);
if (TypedCommand)
MIsHostEvent = TypedCommand->getWorkerContext() == nullptr;
if (TypedCommand && TypedCommand->getWorkerContext() == nullptr)
markAsHost();
}

void event_impl::markAsHost() {
MIsHostEvent = true;
if (!MHostProfilingInfo)
allocateHostProfilingInfo();
}

} // namespace detail
Expand Down
3 changes: 3 additions & 0 deletions sycl/source/detail/event_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -352,6 +352,8 @@ class event_impl {
return MEvent && MQueue.expired() && !MIsEnqueued && !MCommand;
}

void markAsHost();

protected:
// When instrumentation is enabled emits trace event for event wait begin and
// returns the telemetry event generated for the wait
Expand All @@ -361,6 +363,7 @@ class event_impl {
void instrumentationEpilog(void *TelementryEvent, const std::string &Name,
int32_t StreamID, uint64_t IId) const;
void checkProfilingPreconditions() const;
void allocateHostProfilingInfo();

std::atomic<ur_event_handle_t> MEvent = nullptr;
// Stores submission time of command associated with event
Expand Down
5 changes: 3 additions & 2 deletions sycl/source/detail/queue_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -302,7 +302,7 @@ void queue_impl::addEvent(const event &Event) {
const EventImplPtr &EImpl = getSyclObjImpl(Event);
assert(EImpl && "Event implementation is missing");
auto *Cmd = static_cast<Command *>(EImpl->getCommand());
if (Cmd != nullptr && EImpl->getHandle() == nullptr &&
if (Cmd != nullptr && (EImpl->getHandle() == nullptr || EImpl->isHost()) &&
!EImpl->isDiscarded()) {
std::weak_ptr<event_impl> EventWeakPtr{EImpl};
std::lock_guard<std::mutex> Lock{MMutex};
Expand Down Expand Up @@ -646,7 +646,8 @@ void queue_impl::wait(const detail::code_location &CodeLoc) {
EventImplWeakPtrIt->lock()) {
// A nullptr UR event indicates that urQueueFinish will not cover it,
// either because it's a host task event or an unenqueued one.
if (nullptr == EventImplSharedPtr->getHandle()) {
if (nullptr == EventImplSharedPtr->getHandle() ||
EventImplSharedPtr->isHost()) {
EventImplSharedPtr->wait(EventImplSharedPtr);
}
}
Expand Down
9 changes: 8 additions & 1 deletion sycl/source/detail/queue_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -275,7 +275,8 @@ class queue_impl {
ur_native_handle_t nativeHandle = 0;
getAdapter()->call<UrApiKind::urQueueGetNativeHandle>(MQueue, nullptr,
&nativeHandle);
__SYCL_OCL_CALL(clRetainCommandQueue, ur::cast<cl_command_queue>(nativeHandle));
__SYCL_OCL_CALL(clRetainCommandQueue,
ur::cast<cl_command_queue>(nativeHandle));
return ur::cast<cl_command_queue>(nativeHandle);
}

Expand Down Expand Up @@ -682,6 +683,12 @@ class queue_impl {
return ResEvent;
}

bool nativeHostTaskHandling() {
return std::getenv("SYCL_ENABLE_USER_EVENTS_PATH") &&
!MDevice.isUSMAllocationPresent() &&
(MDevice.getBackend() == backend::ext_oneapi_level_zero);
}

#ifndef __INTEL_PREVIEW_BREAKING_CHANGES
// CMPLRLLVM-66082
// These methods are for accessing a member that should live in the
Expand Down
Loading
Loading