Skip to content
1 change: 1 addition & 0 deletions sycl/include/sycl/buffer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -477,6 +477,7 @@ class buffer : public detail::buffer_plain,
buffer &operator=(buffer &&rhs) = default;

~buffer() {
CPOUT << "~buffer()" << std::endl;
try {
buffer_plain::handleRelease();
} catch (std::exception &e) {
Expand Down
4 changes: 4 additions & 0 deletions sycl/include/sycl/detail/common.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,10 @@
#include <type_traits> // for enable_if_t
#include <utility> // for index_sequence, make_i...

// CP
//#define CPOUT std::clog
#define CPOUT std::clog.rdbuf(NULL); std::clog

// Default signature enables the passing of user code location information to
// public methods as a default argument.
namespace sycl {
Expand Down
3 changes: 3 additions & 0 deletions sycl/include/sycl/kernel_bundle.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -541,6 +541,9 @@ kernel_bundle<State> get_kernel_bundle(const context &Ctx) {
return get_kernel_bundle<State>(Ctx, Ctx.get_devices());
}

// CP
__SYCL_EXPORT void test_release(sycl::context &Ctx, ur_native_handle_t NativeHandle);

namespace detail {

// Internal non-template versions of get_kernel_bundle API which is used by
Expand Down
2 changes: 2 additions & 0 deletions sycl/include/sycl/property_list.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@
#include <type_traits> // for conditional_t, enable...
#include <vector> // for vector



namespace sycl {
inline namespace _V1 {
namespace ext::oneapi {
Expand Down
2 changes: 2 additions & 0 deletions sycl/source/detail/buffer_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -140,9 +140,11 @@ class buffer_impl final : public SYCLMemObjT {
MemObjType getType() const override { return MemObjType::Buffer; }

~buffer_impl() {
CPOUT << "~buffer_impl" << std::endl;
try {
BaseT::updateHostMemory();
} catch (...) {
std::cout << "exception during updateHostMemory() called from ~buffer_impl" << std::endl;
}
destructorNotification(this);
}
Expand Down
8 changes: 8 additions & 0 deletions sycl/source/detail/context_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@ context_impl::context_impl(const device &Device, async_handler AsyncHandler,
MContext(nullptr),
MPlatform(detail::getSyclObjImpl(Device.get_platform())),
MPropList(PropList), MSupportBufferLocationByDevices(NotChecked) {
// CP
CPOUT << "context_impl(dev, async, plist) constructor" << std::endl;
verifyProps(PropList);
MKernelProgramCache.setContextPtr(this);
}
Expand All @@ -43,6 +45,8 @@ context_impl::context_impl(const std::vector<sycl::device> Devices,
: MOwnedByRuntime(true), MAsyncHandler(AsyncHandler), MDevices(Devices),
MContext(nullptr), MPlatform(), MPropList(PropList),
MSupportBufferLocationByDevices(NotChecked) {
// CP
CPOUT << "context_impl(devices, async, plist) constructor" << std::endl;
verifyProps(PropList);
MPlatform = detail::getSyclObjImpl(MDevices[0].get_platform());
std::vector<ur_device_handle_t> DeviceIds;
Expand Down Expand Up @@ -76,6 +80,8 @@ context_impl::context_impl(ur_context_handle_t UrContext,
: MOwnedByRuntime(OwnedByRuntime), MAsyncHandler(AsyncHandler),
MDevices(DeviceList), MContext(UrContext), MPlatform(),
MSupportBufferLocationByDevices(NotChecked) {
// CP
CPOUT << "context_impl(UrContext, async, Adapter, DeviceList, OwnedByRuntime) constructor" << std::endl;
if (!MDevices.empty()) {
MPlatform = detail::getSyclObjImpl(MDevices[0].get_platform());
} else {
Expand Down Expand Up @@ -126,6 +132,8 @@ cl_context context_impl::get() const {
}

context_impl::~context_impl() {
// CP
CPOUT << "~context_impl() called" << std::endl;
try {
// Free all events associated with the initialization of device globals.
for (auto &DeviceGlobalInitializer : MDeviceGlobalInitializers)
Expand Down
2 changes: 2 additions & 0 deletions sycl/source/detail/context_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@
#include <optional>
#include <set>



namespace sycl {
inline namespace _V1 {
// Forward declaration
Expand Down
1 change: 1 addition & 0 deletions sycl/source/detail/error_handling/error_handling.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,7 @@ void handleInvalidWorkGroupSize(const device_impl &DeviceImpl,

for (size_t I = 0; I < 3; ++I) {
if (MaxThreadsPerBlock[I] < NDRDesc.LocalSize[I]) {
CPOUT << "---- THROWING ---- " << std::endl;
throw sycl::exception(make_error_code(errc::nd_range),
"The number of work-items in each dimension of a "
"work-group cannot exceed {" +
Expand Down
8 changes: 7 additions & 1 deletion sycl/source/detail/event_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,8 @@ void event_impl::initContextIfNeeded() {
}

event_impl::~event_impl() {
// CP
CPOUT << "~event_impl() called" << std::endl;
try {
auto Handle = this->getHandle();
if (Handle)
Expand Down Expand Up @@ -145,7 +147,8 @@ void event_impl::setContextImpl(const ContextImplPtr &Context) {
event_impl::event_impl(ur_event_handle_t Event, const context &SyclContext)
: MEvent(Event), MContext(detail::getSyclObjImpl(SyclContext)),
MIsFlushed(true), MState(HES_Complete) {

// CP
CPOUT << "event_impl(ur_event_handle_t, context )" << std::endl;
ur_context_handle_t TempContext;
getAdapter()->call<UrApiKind::urEventGetInfo>(
this->getHandle(), UR_EVENT_INFO_CONTEXT, sizeof(ur_context_handle_t),
Expand All @@ -163,6 +166,9 @@ event_impl::event_impl(const QueueImplPtr &Queue)
: MQueue{Queue}, MIsProfilingEnabled{!Queue || Queue->MIsProfilingEnabled},
MFallbackProfiling{MIsProfilingEnabled && Queue &&
Queue->isProfilingFallback()} {
// CP
CPOUT << "event_impl(QueueImplPtr)" << std::endl;

if (Queue)
this->setContextImpl(Queue->getContextImplPtr());
else {
Expand Down
2 changes: 2 additions & 0 deletions sycl/source/detail/event_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,8 @@ class event_impl {
// ONEAPI_DEVICE_SELECTOR. Deferring may lead to conficts with noexcept
// event methods. This ::get() call uses static vars to read and parse the
// ODS env var exactly once.
// CP
CPOUT << "event_impl<HES_Complte>() constructor" << std::endl;
SYCLConfig<ONEAPI_DEVICE_SELECTOR>::get();
}

Expand Down
46 changes: 24 additions & 22 deletions sycl/source/detail/global_handler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -60,10 +60,6 @@ class ObjectUsageCounter {

LockGuard Guard(GlobalHandler::MSyclGlobalHandlerProtector);
MCounter--;
GlobalHandler *RTGlobalObjHandler = GlobalHandler::getInstancePtr();
if (RTGlobalObjHandler) {
RTGlobalObjHandler->prepareSchedulerToRelease(!MCounter);
}
} catch (std::exception &e) {
__SYCL_REPORT_EXCEPTION_TO_STREAM("exception in ~ObjectUsageCounter", e);
}
Expand Down Expand Up @@ -234,6 +230,8 @@ void GlobalHandler::releaseDefaultContexts() {
// Note that on Windows the destruction of the default context
// races with the detaching of the DLL object that calls urLoaderTearDown.

CPOUT << "releaseDefaultContext()" << std::endl;

MPlatformToDefaultContextCache.Inst.reset(nullptr);
}

Expand All @@ -242,7 +240,10 @@ struct EarlyShutdownHandler {
try {
#ifdef _WIN32
// on Windows we keep to the existing shutdown procedure
GlobalHandler::instance().releaseDefaultContexts();
//GlobalHandler::instance().endDeferredRelease();
//GlobalHandler::instance().releaseDefaultContexts();
//shutdown_early();
//shutdown_late();
#else
shutdown_early();
#endif
Expand Down Expand Up @@ -284,21 +285,22 @@ void GlobalHandler::unloadAdapters() {
}

void GlobalHandler::prepareSchedulerToRelease(bool Blocking) {
// CP - fix part 1
#ifndef _WIN32
if (Blocking)
drainThreadPool();
#endif
if (MScheduler.Inst)
MScheduler.Inst->releaseResources(Blocking ? BlockingT::BLOCKING
: BlockingT::NON_BLOCKING);
#endif
}

void GlobalHandler::drainThreadPool() {
if (MHostTaskThreadPool.Inst)
MHostTaskThreadPool.Inst->drain();
}

#ifdef _WIN32
//#ifdef _WIN32
// because of something not-yet-understood on Windows
// threads may be shutdown once the end of main() is reached
// making an orderly shutdown difficult. Fortunately, Windows
Expand All @@ -309,7 +311,7 @@ void shutdown_win() {
GlobalHandler *&Handler = GlobalHandler::getInstancePtr();
Handler->unloadAdapters();
}
#else
//#else
void shutdown_early() {
const LockGuard Lock{GlobalHandler::MSyclGlobalHandlerProtector};
GlobalHandler *&Handler = GlobalHandler::getInstancePtr();
Expand All @@ -321,8 +323,10 @@ void shutdown_early() {

// Ensure neither host task is working so that no default context is accessed
// upon its release
CPOUT << "shutdown_early() about to prepareSchedulerToRelease" << std::endl;
Handler->prepareSchedulerToRelease(true);

CPOUT << "shutdown_early() about to finishAndWait()" << std::endl;
if (Handler->MHostTaskThreadPool.Inst)
Handler->MHostTaskThreadPool.Inst->finishAndWait();

Expand Down Expand Up @@ -353,9 +357,18 @@ void shutdown_late() {
delete Handler;
Handler = nullptr;
}
#endif
//#endif

#ifdef _WIN32
// a simple wrapper to catch and stream any exception then continue
template <typename F>
void safe_call(F func) {
try {
func();
} catch (const std::exception& e) {
std::cerr << "exception in DllMain DLL_PROCESS_DETACH " << e.what() << std::endl;
}
}
extern "C" __SYCL_EXPORT BOOL WINAPI DllMain(HINSTANCE hinstDLL,
DWORD fdwReason,
LPVOID lpReserved) {
Expand All @@ -374,19 +387,8 @@ extern "C" __SYCL_EXPORT BOOL WINAPI DllMain(HINSTANCE hinstDLL,
if (PrintUrTrace)
std::cout << "---> DLL_PROCESS_DETACH syclx.dll\n" << std::endl;

#ifdef XPTI_ENABLE_INSTRUMENTATION
if (xptiTraceEnabled())
return TRUE; // When doing xpti tracing, we can't safely call shutdown.
// TODO: figure out what XPTI is doing that prevents
// release.
#endif

try {
shutdown_win();
} catch (std::exception &e) {
__SYCL_REPORT_EXCEPTION_TO_STREAM("exception in shutdown_win", e);
return FALSE;
}
safe_call([](){ shutdown_early(); });
safe_call([](){ shutdown_late(); });
break;
case DLL_PROCESS_ATTACH:
if (PrintUrTrace)
Expand Down
17 changes: 17 additions & 0 deletions sycl/source/detail/kernel_bundle_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -533,6 +533,23 @@ class kernel_bundle_impl {
ContextImpl->getHandleRef(), spirv.data(), spirv.size(), nullptr,
&UrProgram);
// program created by urProgramCreateWithIL is implicitly retained.

// -------------------------------------
// CP - adding to try an force an imbalance
Adapter->call<detail::UrApiKind::urProgramRetain>(UrProgram);

// rebalance:
// this works.
// Adapter->call<detail::UrApiKind::urProgramRelease>(UrProgram);

// this ALSO works. So much for my theory.
detail::UrFuncInfo<detail::UrApiKind::urProgramRelease> programReleaseInfo;
auto programRelease = programReleaseInfo.getFuncPtrFromModule(detail::ur::getURLoaderLibrary());
programRelease(UrProgram);

// -------------------------------------


if (UrProgram == nullptr)
throw sycl::exception(
sycl::make_error_code(errc::invalid),
Expand Down
6 changes: 6 additions & 0 deletions sycl/source/detail/kernel_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@ kernel_impl::kernel_impl(ur_kernel_handle_t Kernel, ContextImplPtr Context,
Context)),
MCreatedFromSource(true), MKernelBundleImpl(std::move(KernelBundleImpl)),
MIsInterop(true), MKernelArgMaskPtr{ArgMask} {
// CP
CPOUT << "kernel_impl(kernel, context, bundle, argmas) constructor" << std::endl;
ur_context_handle_t UrContext = nullptr;
// Using the adapter from the passed ContextImpl
getAdapter()->call<UrApiKind::urKernelGetInfo>(
Expand Down Expand Up @@ -53,10 +55,14 @@ kernel_impl::kernel_impl(ur_kernel_handle_t Kernel, ContextImplPtr ContextImpl,
MCreatedFromSource(false), MDeviceImageImpl(std::move(DeviceImageImpl)),
MKernelBundleImpl(std::move(KernelBundleImpl)),
MKernelArgMaskPtr{ArgMask}, MCacheMutex{CacheMutex} {
// CP
CPOUT << "kernel_impl(kernel, context, deviceimage, bundle, argmask, program, mutex) constructor" << std::endl;
MIsInterop = MKernelBundleImpl->isInterop();
}

kernel_impl::~kernel_impl() {
// CP
CPOUT << "~kernel_impl() called" << std::endl;
try {
// TODO catch an exception and put it to list of asynchronous exceptions
getAdapter()->call<UrApiKind::urKernelRelease>(MKernel);
Expand Down
13 changes: 13 additions & 0 deletions sycl/source/detail/kernel_program_cache.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -112,14 +112,20 @@ class KernelProgramCache {
struct ProgramBuildResult : public BuildResult<ur_program_handle_t> {
AdapterPtr Adapter;
ProgramBuildResult(const AdapterPtr &Adapter) : Adapter(Adapter) {
// CP
CPOUT << "ProgramBuildResult(adapter)" << std::endl;
Val = nullptr;
}
ProgramBuildResult(const AdapterPtr &Adapter, BuildState InitialState)
: Adapter(Adapter) {
// CP
CPOUT << "ProgramBuildResult(adapter, state)" << std::endl;
Val = nullptr;
this->State.store(InitialState);
}
~ProgramBuildResult() {
// CP
CPOUT << "~ProgramBuildResult()" << std::endl;
try {
if (Val) {
ur_result_t Err =
Expand All @@ -131,6 +137,9 @@ class KernelProgramCache {
e);
}
}
ProgramBuildResult() = delete;
ProgramBuildResult(const ProgramBuildResult&) = delete;
ProgramBuildResult& operator=(const ProgramBuildResult&) = delete;
};
using ProgramBuildResultPtr = std::shared_ptr<ProgramBuildResult>;

Expand Down Expand Up @@ -198,9 +207,13 @@ class KernelProgramCache {
struct KernelBuildResult : public BuildResult<KernelArgMaskPairT> {
AdapterPtr Adapter;
KernelBuildResult(const AdapterPtr &Adapter) : Adapter(Adapter) {
// CP
CPOUT << "KernelBuildResult(adapter)" << std::endl;
Val.first = nullptr;
}
~KernelBuildResult() {
// CP
CPOUT << "~KernelBuildResult()" << std::endl;
try {
if (Val.first) {
ur_result_t Err =
Expand Down
6 changes: 6 additions & 0 deletions sycl/source/detail/queue_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,8 @@ class queue_impl {
MIsProfilingEnabled(has_property<property::queue::enable_profiling>()),
MQueueID{
MNextAvailableQueueID.fetch_add(1, std::memory_order_relaxed)} {
// CP
CPOUT << "queue_impl() constructor" << std::endl;
verifyProps(PropList);
if (has_property<property::queue::enable_profiling>()) {
if (has_property<ext::oneapi::property::queue::discard_events>())
Expand Down Expand Up @@ -232,6 +234,7 @@ class queue_impl {
MIsProfilingEnabled(has_property<property::queue::enable_profiling>()),
MQueueID{
MNextAvailableQueueID.fetch_add(1, std::memory_order_relaxed)} {
CPOUT << "queue_impl() interop constructor" << std::endl;
queue_impl_interop(UrQueue);
}

Expand All @@ -251,11 +254,14 @@ class queue_impl {
MIsProfilingEnabled(has_property<property::queue::enable_profiling>()),
MQueueID{
MNextAvailableQueueID.fetch_add(1, std::memory_order_relaxed)} {
CPOUT << "queue_impl() verify/interop constructor " << std::endl;
verifyProps(PropList);
queue_impl_interop(UrQueue);
}

~queue_impl() {
// CP
CPOUT << "~queue_impl() called" << std::endl;
try {
#if XPTI_ENABLE_INSTRUMENTATION
// The trace event created in the constructor should be active through the
Expand Down
Loading
Loading