Skip to content

Commit 9f6c3ca

Browse files
authored
fix: Evict inference request internal release callback after invoking (#452)
1 parent 6e14357 commit 9f6c3ca

File tree

3 files changed

+21
-10
lines changed

3 files changed

+21
-10
lines changed

include/triton/core/tritonserver.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// Copyright 2020-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
1+
// Copyright 2020-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
22
//
33
// Redistribution and use in source and binary forms, with or without
44
// modification, are permitted provided that the following conditions
@@ -1041,7 +1041,8 @@ TRITONSERVER_InferenceRequestNew(
10411041
struct TRITONSERVER_Server* server, const char* model_name,
10421042
const int64_t model_version);
10431043

1044-
/// Delete an inference request object.
1044+
/// Delete an inference request object. The request object must be
1045+
/// released before deletion.
10451046
///
10461047
/// \param inference_request The request object.
10471048
/// \return a TRITONSERVER_Error indicating success or failure.

src/infer_request.cc

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,7 @@ InferenceRequest::InferenceRequest(
112112
SetPriority(0);
113113
// Outer-most release callback to ensure a request has been taken, this
114114
// callback won't be invoked, if certain flags are set.
115-
release_callbacks_.emplace_back(
115+
release_callbacks_.emplace_back(std::make_pair(
116116
[](std::unique_ptr<InferenceRequest>& request,
117117
const uint32_t flags) -> Status {
118118
if (flags & TRITONSERVER_REQUEST_RELEASE_RESCHEDULE) {
@@ -123,7 +123,8 @@ InferenceRequest::InferenceRequest(
123123
"configured to handle such a flag.");
124124
}
125125
return Status::Success;
126-
});
126+
},
127+
false));
127128
}
128129

129130
Status
@@ -476,9 +477,16 @@ InferenceRequest::Release(
476477
{
477478
// Invoke the release callbacks added internally before releasing the
478479
// request to user provided callback.
479-
for (auto it = request->release_callbacks_.rbegin();
480-
it != request->release_callbacks_.rend(); it++) {
481-
RETURN_IF_ERROR((*it)(request, release_flags));
480+
481+
// Invoke callbacks in reverse order. Evict internal callbacks for reusing
482+
// inference request object.
483+
auto& release_callbacks = request->release_callbacks_;
484+
for (int i = release_callbacks.size() - 1; i >= 0; --i) {
485+
auto [release_fn, is_internal] = release_callbacks[i];
486+
if (is_internal) {
487+
release_callbacks.erase(release_callbacks.begin() + i);
488+
}
489+
release_fn(request, release_flags);
482490
if (request == nullptr) {
483491
return Status::Success;
484492
}
@@ -500,6 +508,7 @@ InferenceRequest::Release(
500508
"Failed to set released state");
501509
void* userp = request->release_userp_;
502510
auto& release_fn = request->release_fn_;
511+
LOG_INFO << "userp " << userp << std::endl;
503512
release_fn(
504513
reinterpret_cast<TRITONSERVER_InferenceRequest*>(request.release()),
505514
release_flags, userp);

src/infer_request.h

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -548,7 +548,7 @@ class InferenceRequest {
548548
// and they will be invoked in reversed order.
549549
Status AddInternalReleaseCallback(InternalReleaseFn&& callback)
550550
{
551-
release_callbacks_.emplace_back(std::move(callback));
551+
release_callbacks_.emplace_back(std::make_pair(std::move(callback), true));
552552
return Status::Success;
553553
}
554554

@@ -832,8 +832,9 @@ class InferenceRequest {
832832
TRITONSERVER_InferenceRequestReleaseFn_t release_fn_;
833833
void* release_userp_;
834834

835-
// Additional release callbacks invoked before 'release_fn_'.
836-
std::vector<InternalReleaseFn> release_callbacks_;
835+
// Additional release callbacks invoked before 'release_fn_'. Set boolean to
836+
// true if release callback is internal and should be evicted after invoking.
837+
std::vector<std::pair<InternalReleaseFn, bool>> release_callbacks_;
837838

838839
// Delegator to be invoked on sending responses.
839840
std::function<void(std::unique_ptr<InferenceResponse>&&, const uint32_t)>

0 commit comments

Comments
 (0)