Skip to content

Commit 34f0486

Browse files
committed
Merge branch 'main' of https://github.com/triton-inference-server/core into spolisetty/tri-26-triton-dali-ensemble-model-memory-issue
2 parents 2cd6c7b + e813ef8 commit 34f0486

File tree

2 files changed

+10
-5
lines changed

2 files changed

+10
-5
lines changed

src/infer_request.cc

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -481,11 +481,13 @@ InferenceRequest::Release(
481481
// object.
482482
auto& release_callbacks = request->release_callbacks_;
483483
for (int i = release_callbacks.size() - 1; i >= 0; --i) {
484-
auto [release_fn, is_internal] = release_callbacks[i];
485-
if (is_internal) {
484+
// Callbacks must be invoked before erasing
485+
RETURN_IF_ERROR(release_callbacks[i].first(request, release_flags));
486+
if (release_callbacks[i].second) {
487+
// Erase internal callbacks to avoid duplicate callbacks in case of
488+
// reusing the InferenceRequest object
486489
release_callbacks.erase(release_callbacks.begin() + i);
487490
}
488-
release_fn(request, release_flags);
489491
if (request == nullptr) {
490492
return Status::Success;
491493
}

src/sequence_batch_scheduler/sequence_utils.h

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
1+
// Copyright 2023-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
22
//
33
// Redistribution and use in source and binary forms, with or without
44
// modification, are permitted provided that the following conditions
@@ -93,7 +93,10 @@ class IterativeSequencer : public Sequencer {
9393
std::unique_ptr<InferenceRequest>& irequest,
9494
InferenceRequest::InternalReleaseFn&& callback) override
9595
{
96-
if (irequest->Flags() & TRITONSERVER_REQUEST_FLAG_SEQUENCE_START) {
96+
// Internal release callbacks are removed after getting invoked in
97+
// InferenceRequest::Release. Make sure internal release callback is added
98+
// for each iterative sequence request.
99+
if (!(irequest->Flags() & TRITONSERVER_REQUEST_FLAG_SEQUENCE_END)) {
97100
irequest->AddInternalReleaseCallback(std::move(callback));
98101
}
99102
}

0 commit comments

Comments
 (0)