Skip to content

Commit 35c037c

Browse files
authored
Add ability to Register a TeardownCallback to notify release of L0 resources (#333)
* Add ability to Register a TeardownCallback to notify release of L0 resources Signed-off-by: Neil R. Spruit <[email protected]>
1 parent 1a14f0d commit 35c037c

File tree

3 files changed

+209
-95
lines changed

3 files changed

+209
-95
lines changed

include/loader/ze_loader.h

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,41 @@ zelEnableTracingLayer();
9494
ZE_DLLEXPORT bool ZE_APICALL
9595
zelCheckIsLoaderInTearDown();
9696

97+
typedef void (*zel_loader_teardown_callback_t)();
98+
typedef void (*zel_application_teardown_callback_t)(uint32_t index);
99+
100+
/**
101+
* @brief Registers a teardown callback to be invoked during loader teardown.
102+
*
103+
* This function allows the application to register a callback function that will be called
104+
* when the loader is being torn down. The loader will also provide its own callback function
105+
* and assign an index to the registered callback.
106+
*
107+
* The application_callback is required to be a function that takes no arguments and returns void.
108+
* In addition, the application_callback should be thread-safe and not block to prevent deadlocking the
109+
* loader teardown process.
110+
*
111+
* For example, the application_callback used by the static loader is:
112+
* void staticLoaderTeardownCallback() {
113+
* loaderTeardownCallbackReceived = true;
114+
* }
115+
* The application_callback should provide a simple notification to the application that the loader is being torn down.
116+
*
117+
* @param[in] application_callback Application's callback function to be called during loader teardown.
118+
* @param[out] loader_callback Pointer to the loader's callback function.
119+
* @param[out] index Index assigned to the registered callback.
120+
*
121+
* @return
122+
* - ZE_RESULT_SUCCESS if the callback was successfully registered.
123+
* - Appropriate error code otherwise.
124+
*/
125+
ZE_DLLEXPORT ze_result_t ZE_APICALL
126+
zelRegisterTeardownCallback(
127+
zel_loader_teardown_callback_t application_callback, // [in] Application's callback function to be called during loader teardown
128+
zel_application_teardown_callback_t *loader_callback, // [out] Pointer to the loader's callback function
129+
uint32_t *index // [out] Index assigned to the registered callback
130+
);
131+
97132
///////////////////////////////////////////////////////////////////////////////
98133
/// @brief Exported function for Disabling the Tracing Layer During Runtime.
99134
///

source/lib/ze_lib.cpp

Lines changed: 163 additions & 95 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,37 @@ namespace ze_lib
2727
}
2828
}
2929
bool delayContextDestruction = false;
30+
bool loaderTeardownCallbackReceived = false;
31+
bool loaderTeardownRegistrationEnabled = false;
32+
33+
/// @brief Callback function to handle loader teardown events.
34+
///
35+
/// This function sets the `loaderTeardownCallbackReceived` flag to true,
36+
/// indicating that a loader teardown callback has been received.
37+
/// It is intended to be used as a static callback during the loader's
38+
/// teardown process.
39+
void staticLoaderTeardownCallback() {
40+
loaderTeardownCallbackReceived = true;
41+
}
3042
#endif
43+
/**
44+
* @brief Removes a teardown callback from the context's callback registry.
45+
*
46+
* This function checks if a teardown callback with the specified index exists
47+
* in the context's teardownCallbacks map. If it exists, the callback is removed.
48+
*
49+
* @param index The unique identifier of the teardown callback to remove.
50+
*/
51+
void applicationTeardownCallback(uint32_t index) {
52+
std::lock_guard<std::mutex> lock(ze_lib::context->teardownCallbacksMutex);
53+
if (ze_lib::context->teardownCallbacks.find(index) != ze_lib::context->teardownCallbacks.end()) {
54+
if (ze_lib::context->debugTraceEnabled) {
55+
std::string message = "applicationTeardownCallback received for index: " + std::to_string(index);
56+
ze_lib::context->debug_trace_message(message, "");
57+
}
58+
ze_lib::context->teardownCallbacks.erase(index);
59+
}
60+
}
3161
bool destruction = false;
3262

3363
///////////////////////////////////////////////////////////////////////////////
@@ -40,9 +70,19 @@ namespace ze_lib
4070
__zedlllocal context_t::~context_t()
4171
{
4272
#ifdef DYNAMIC_LOAD_LOADER
73+
if (loaderTeardownRegistrationEnabled && !loaderTeardownCallbackReceived) {
74+
loaderTeardownCallback(loaderTeardownCallbackIndex);
75+
}
4376
if (loader) {
4477
FREE_DRIVER_LIBRARY( loader );
4578
}
79+
#else
80+
// Given the loader teardown, notify the registered callbacks that the loader is being torn down.
81+
for (auto &callback : teardownCallbacks) {
82+
callback.second();
83+
}
84+
// Clear the teardown callbacks map once the callbacks have been executed.
85+
teardownCallbacks.clear();
4686
#endif
4787
ze_lib::destruction = true;
4888
};
@@ -339,9 +379,29 @@ namespace ze_lib
339379
isInitialized = true;
340380
}
341381
#ifdef DYNAMIC_LOAD_LOADER
342-
if (!delayContextDestruction) {
343-
std::atexit(context_at_exit_destructor);
344-
}
382+
std::call_once(ze_lib::context->initTeardownCallbacksOnce, [this]() {
383+
if (!delayContextDestruction) {
384+
std::atexit(context_at_exit_destructor);
385+
}
386+
// Get the function pointer for zelRegisterTeardownCallback from the dynamic loader
387+
typedef ze_result_t (ZE_APICALL *zelRegisterTeardownCallback_t)(
388+
zel_loader_teardown_callback_t,
389+
zel_application_teardown_callback_t*,
390+
uint32_t*);
391+
auto pfnZelRegisterTeardownCallback = reinterpret_cast<zelRegisterTeardownCallback_t>(
392+
GET_FUNCTION_PTR(loader, "zelRegisterTeardownCallback"));
393+
if (pfnZelRegisterTeardownCallback != nullptr) {
394+
auto register_teardown_result = pfnZelRegisterTeardownCallback(staticLoaderTeardownCallback, &loaderTeardownCallback, &loaderTeardownCallbackIndex);
395+
if (register_teardown_result != ZE_RESULT_SUCCESS) {
396+
std::string message = "ze_lib Context Init() zelRegisterTeardownCallback failed with ";
397+
debug_trace_message(message, to_string(register_teardown_result));
398+
} else {
399+
loaderTeardownRegistrationEnabled = true;
400+
std::string message = "ze_lib Context Init() zelRegisterTeardownCallback completed for the static loader with";
401+
debug_trace_message(message, to_string(register_teardown_result));
402+
}
403+
}
404+
});
345405
#endif
346406
return result;
347407
}
@@ -393,6 +453,15 @@ zelSetDriverTeardown()
393453
{
394454
ze_result_t result = ZE_RESULT_SUCCESS;
395455
if (!ze_lib::destruction) {
456+
if (ze_lib::context) {
457+
// Given the driver teardown, notify the registered callbacks that the loader is being torn down.
458+
for (auto &callback : ze_lib::context->teardownCallbacks) {
459+
callback.second();
460+
}
461+
// Clear the registered callbacks now that they have been called.
462+
ze_lib::context->teardownCallbacks.clear();
463+
}
464+
396465
ze_lib::destruction = true;
397466
}
398467
return result;
@@ -408,117 +477,116 @@ zelSetDelayLoaderContextTeardown()
408477
#endif
409478
}
410479

411-
#ifdef DYNAMIC_LOAD_LOADER
412-
#define ZEL_STABILITY_CHECK_RESULT_SUCCESS 0
413-
#define ZEL_STABILITY_CHECK_RESULT_DRIVER_GET_NULL 1
414-
#define ZEL_STABILITY_CHECK_RESULT_DRIVER_GET_FAILED 2
415-
#define ZEL_STABILITY_CHECK_RESULT_EXCEPTION 3
416-
417-
/**
418-
* @brief Performs a stability check for the Level Zero loader.
419-
*
420-
* This function checks the stability of the Level Zero loader by verifying
421-
* the presence of the loader module, the validity of the `zeDriverGet` function
422-
* pointer, and the ability to retrieve driver information. The result of the
423-
* stability check is communicated through the provided promise.
424-
*
425-
* @param stabilityPromise A promise object used to communicate the result of
426-
* the stability check. The promise is set with one of
427-
* the following values:
428-
* - ZEL_STABILITY_CHECK_RESULT_DRIVER_GET_NULL: The
429-
* `zeDriverGet` function pointer is invalid.
430-
* - ZEL_STABILITY_CHECK_RESULT_DRIVER_GET_FAILED: The
431-
* loader failed to retrieve driver information.
432-
* - ZEL_STABILITY_CHECK_RESULT_EXCEPTION: An
433-
* exception occurred during the stability check.
434-
* - ZEL_STABILITY_CHECK_RESULT_SUCCESS: The stability
435-
* check was successful.
436-
*
437-
* @note If debug tracing is enabled, debug messages are logged for each failure
438-
* scenario.
439-
* @note If the Loader is completely torn down, this thread is expected to be killed
440-
* due to invalid memory access and the stability check will determine a failure.
441-
*
442-
* @exception This function catches all exceptions internally and does not throw.
443-
*/
444-
void stabilityCheck(std::promise<int> stabilityPromise) {
445-
try {
446-
if (!ze_lib::context->loaderDriverGet) {
447-
if (ze_lib::context->debugTraceEnabled) {
448-
std::string message = "LoaderDriverGet is a bad pointer. Exiting stability checker thread.";
449-
ze_lib::context->debug_trace_message(message, "");
450-
}
451-
stabilityPromise.set_value(ZEL_STABILITY_CHECK_RESULT_DRIVER_GET_NULL);
452-
return;
453-
}
454-
455-
uint32_t driverCount = 0;
456-
ze_result_t result = ZE_RESULT_ERROR_UNINITIALIZED;
457-
result = ze_lib::context->loaderDriverGet(&driverCount, nullptr);
458-
if (result != ZE_RESULT_SUCCESS || driverCount == 0) {
459-
if (ze_lib::context->debugTraceEnabled) {
460-
std::string message = "Loader stability check failed. Exiting stability checker thread.";
461-
ze_lib::context->debug_trace_message(message, "");
462-
}
463-
stabilityPromise.set_value(ZEL_STABILITY_CHECK_RESULT_DRIVER_GET_FAILED);
464-
return;
480+
/// @brief Registers a teardown callback function to be invoked during loader teardown.
481+
///
482+
/// This function allows an application to register a callback that will be called when the loader is being torn down.
483+
/// The loader provides a callback function pointer to the application, which the application should call to notify
484+
/// the loader that it is tearing down. The loader will then remove the application's callback from its list of registered callbacks.
485+
///
486+
/// @param[in] application_callback
487+
/// The application's callback function to be called during loader teardown. Must not be nullptr.
488+
/// @param[out] loader_callback
489+
/// Pointer to the loader's callback function. The application should call this function to notify the loader of teardown.
490+
/// @param[out] index
491+
/// Pointer to a uint32_t that will receive the index assigned to the registered callback.
492+
///
493+
/// @return
494+
/// - ZE_RESULT_SUCCESS: The callback was successfully registered.
495+
/// - ZE_RESULT_ERROR_INVALID_ARGUMENT: The application_callback parameter is nullptr.
496+
/// - ZE_RESULT_ERROR_UNINITIALIZED: The loader context is not initialized.
497+
ze_result_t ZE_APICALL
498+
zelRegisterTeardownCallback(
499+
zel_loader_teardown_callback_t application_callback, // [in] Application's callback function to be called during loader teardown
500+
zel_application_teardown_callback_t *loader_callback, // [out] Pointer to the loader's callback function
501+
uint32_t *index // [out] Index assigned to the registered callback
502+
) {
503+
ze_result_t result = ZE_RESULT_SUCCESS;
504+
if (nullptr == application_callback) {
505+
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
506+
}
507+
if (!ze_lib::context) {
508+
return ZE_RESULT_ERROR_UNINITIALIZED;
509+
}
510+
{
511+
std::lock_guard<std::mutex> lock(ze_lib::context->teardownCallbacksMutex);
512+
// Assign the loader's callback function to the application callback such that the application can notify the loader
513+
// that it is tearing down. The loader will then remove the application's callback from the list of callbacks.
514+
*loader_callback = ze_lib::applicationTeardownCallback;
515+
// Increment the teardown callback count and assign the index to the application callback.
516+
ze_lib::context->teardownCallbacksCount.fetch_add(1);
517+
*index = ze_lib::context->teardownCallbacksCount.load();
518+
ze_lib::context->teardownCallbacks.insert(std::pair<uint32_t, zel_loader_teardown_callback_t>(*index, application_callback));
519+
if (ze_lib::context->debugTraceEnabled) {
520+
std::string message = "Registered teardown callback with index: " + std::to_string(*index);
521+
ze_lib::context->debug_trace_message(message, "");
465522
}
466-
stabilityPromise.set_value(ZEL_STABILITY_CHECK_RESULT_SUCCESS);
467-
return;
468-
} catch (...) {
469-
stabilityPromise.set_value(ZEL_STABILITY_CHECK_RESULT_EXCEPTION);
470-
return;
471523
}
524+
return result;
472525
}
473-
#endif
474526

475-
/**
476-
* @brief Checks if the loader is in the process of tearing down.
477-
*
478-
* This function determines whether the loader is in a teardown state by
479-
* checking the destruction flag or the context pointer. If the loader is
480-
* dynamically loaded thru the static loader code path, then it performs
481-
* an additional stability check using a separate thread that could be killed.
482-
*
483-
* @return true if the loader is in teardown based on the stack variablrs
484-
* or the stability check fails; false otherwise.
485-
*
486-
* @note If the macro DYNAMIC_LOAD_LOADER is defined, a stability checker
487-
* thread is launched to perform additional checks. Any exceptions
488-
* or errors during this process are logged if debug tracing is enabled.
489-
*/
527+
/// @brief Checks if the Level Zero loader is currently in the teardown state.
528+
///
529+
/// This function determines whether the loader is in the process of being destroyed or is otherwise
530+
/// unavailable for further API calls. It performs several checks, including:
531+
/// - Whether the loader's destruction flag is set or the context is null.
532+
/// - On Windows with dynamic loading, it checks for loader teardown notifications,
533+
/// registration status, and the stability of the loader by attempting to call `loaderDriverGet`.
534+
/// - If any of these checks indicate the loader is in teardown or unstable, the function returns true.
535+
///
536+
/// @return true if the loader is in teardown or unstable; false otherwise.
490537
bool ZE_APICALL
491538
zelCheckIsLoaderInTearDown() {
492539
if (ze_lib::destruction || ze_lib::context == nullptr) {
493540
return true;
494541
}
495542
#if defined(DYNAMIC_LOAD_LOADER) && defined(_WIN32)
496-
std::promise<int> stabilityPromise;
497-
std::future<int> resultFuture = stabilityPromise.get_future();
498-
int result = -1;
499-
try {
500-
// Launch the stability checker thread
501-
std::thread stabilityThread(stabilityCheck, std::move(stabilityPromise));
502-
result = resultFuture.get(); // Blocks until the result is available
503-
stabilityThread.join();
504-
} catch (const std::exception& e) {
505-
if (ze_lib::context->debugTraceEnabled) {
506-
std::string message = "Exception caught in parent thread: " + std::string(e.what());
507-
ze_lib::context->debug_trace_message(message, "");
508-
}
509-
} catch (...) {
543+
static bool loaderIsStable = true;
544+
if (!loaderIsStable) {
510545
if (ze_lib::context->debugTraceEnabled) {
511-
std::string message = "Unknown exception caught in parent thread.";
546+
std::string message = "Loader Teardown check failed before, exiting.";
512547
ze_lib::context->debug_trace_message(message, "");
513548
}
549+
return true;
514550
}
515-
if (result != ZEL_STABILITY_CHECK_RESULT_SUCCESS) {
551+
if (ze_lib::loaderTeardownCallbackReceived) {
516552
if (ze_lib::context->debugTraceEnabled) {
517-
std::string message = "Loader stability check failed with result: " + std::to_string(result);
553+
std::string message = "Loader Teardown Notification Received, loader in teardown state.";
518554
ze_lib::context->debug_trace_message(message, "");
519555
}
556+
loaderIsStable = false;
520557
return true;
521558
}
559+
if (!ze_lib::loaderTeardownRegistrationEnabled) {
560+
try {
561+
if (!ze_lib::context->loaderDriverGet) {
562+
if (ze_lib::context->debugTraceEnabled) {
563+
std::string message = "LoaderDriverGet is a bad pointer. Exiting stability checker.";
564+
ze_lib::context->debug_trace_message(message, "");
565+
}
566+
loaderIsStable = false;
567+
return true;
568+
}
569+
570+
uint32_t driverCount = 0;
571+
ze_result_t result = ZE_RESULT_ERROR_UNINITIALIZED;
572+
result = ze_lib::context->loaderDriverGet(&driverCount, nullptr);
573+
if (result != ZE_RESULT_SUCCESS || driverCount == 0) {
574+
if (ze_lib::context->debugTraceEnabled) {
575+
std::string message = "Loader stability check failed. Exiting stability checker.";
576+
ze_lib::context->debug_trace_message(message, "");
577+
}
578+
loaderIsStable = false;
579+
return true;
580+
}
581+
} catch (...) {
582+
if (ze_lib::context->debugTraceEnabled) {
583+
std::string message = "Loader stability check failed. Exception occurred.";
584+
ze_lib::context->debug_trace_message(message, "");
585+
}
586+
loaderIsStable = false;
587+
return true;
588+
}
589+
}
522590
#endif
523591
return false;
524592
}

source/lib/ze_lib.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
#include "layers/zel_tracing_api.h"
1818
#include "layers/zel_tracing_ddi.h"
1919
#include "../utils/logging.h"
20+
#include "loader/ze_loader.h"
2021
#include "ze_util.h"
2122
#include <vector>
2223
#include <mutex>
@@ -175,12 +176,22 @@ namespace ze_lib
175176
bool debugTraceEnabled = false;
176177
bool dynamicTracingSupported = true;
177178
ze_pfnDriverGet_t loaderDriverGet = nullptr;
179+
std::atomic<uint32_t> teardownCallbacksCount{0};
180+
std::map<uint32_t, zel_loader_teardown_callback_t> teardownCallbacks;
181+
std::mutex teardownCallbacksMutex;
182+
#ifdef DYNAMIC_LOAD_LOADER
183+
std::once_flag initTeardownCallbacksOnce;
184+
zel_application_teardown_callback_t loaderTeardownCallback = nullptr;
185+
uint32_t loaderTeardownCallbackIndex = 0;
186+
#endif
178187
};
179188

180189
extern bool destruction;
181190
extern context_t *context;
182191
#ifdef DYNAMIC_LOAD_LOADER
183192
extern bool delayContextDestruction;
193+
extern bool loaderTeardownCallbackReceived;
194+
extern bool loaderTeardownRegistrationEnabled;
184195
#endif
185196

186197
} // namespace ze_lib

0 commit comments

Comments
 (0)