@@ -112,7 +112,7 @@ InferenceRequest::InferenceRequest(
112112 SetPriority (0 );
113113 // Outer-most release callback to ensure a request has been taken, this
114114 // callback won't be invoked, if certain flags are set.
115- release_callbacks_.emplace_back (
115+ release_callbacks_.emplace_back (std::make_pair (
116116 [](std::unique_ptr<InferenceRequest>& request,
117117 const uint32_t flags) -> Status {
118118 if (flags & TRITONSERVER_REQUEST_RELEASE_RESCHEDULE) {
@@ -123,7 +123,8 @@ InferenceRequest::InferenceRequest(
123123 " configured to handle such a flag." );
124124 }
125125 return Status::Success;
126- });
126+ },
127+ false ));
127128}
128129
129130Status
@@ -476,9 +477,16 @@ InferenceRequest::Release(
476477{
477478 // Invoke the release callbacks added internally before releasing the
478479 // request to user provided callback.
479- for (auto it = request->release_callbacks_ .rbegin ();
480- it != request->release_callbacks_ .rend (); it++) {
481- RETURN_IF_ERROR ((*it)(request, release_flags));
480+
481+ // Invoke callbacks in reverse order. Evict internal callbacks for reusing
482+ // inference request object.
483+ auto & release_callbacks = request->release_callbacks_ ;
484+ for (int i = release_callbacks.size () - 1 ; i >= 0 ; --i) {
485+ auto [release_fn, is_internal] = release_callbacks[i];
486+ if (is_internal) {
487+ release_callbacks.erase (release_callbacks.begin () + i);
488+ }
489+ release_fn (request, release_flags);
482490 if (request == nullptr ) {
483491 return Status::Success;
484492 }
@@ -500,6 +508,7 @@ InferenceRequest::Release(
500508 " Failed to set released state" );
501509 void * userp = request->release_userp_ ;
502510 auto & release_fn = request->release_fn_ ;
511+ LOG_INFO << " userp " << userp << std::endl;
503512 release_fn (
504513 reinterpret_cast <TRITONSERVER_InferenceRequest*>(request.release ()),
505514 release_flags, userp);
0 commit comments