Skip to content

Commit 73d374e

Browse files
committed
Merge branch 'main' of https://github.com/triton-inference-server/core into yinggeh-DLIS-6657-client-input-byte-size-check
2 parents b336ecc + d2abb8b commit 73d374e

19 files changed

+253
-74
lines changed

include/triton/core/tritonbackend.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1722,9 +1722,9 @@ TRITONBACKEND_BackendAttributeSetParallelModelInstanceLoading(
17221722
///
17231723
/// \param batcher User-defined placeholder for backend to store and
17241724
/// retrieve information about the batching strategy for this
1725-
/// model.RITONBACKEND_ISPEC return a TRITONSERVER_Error indicating success or
1726-
/// failure. \param model The backend model for which Triton is forming a batch.
1727-
/// \return a TRITONSERVER_Error indicating success or failure.
1725+
/// model. Returns a TRITONSERVER_Error indicating success
1726+
/// or failure. \param model The backend model for which Triton is forming a
1727+
/// batch. \return a TRITONSERVER_Error indicating success or failure.
17281728
TRITONBACKEND_ISPEC TRITONSERVER_Error* TRITONBACKEND_ModelBatcherInitialize(
17291729
TRITONBACKEND_Batcher** batcher, TRITONBACKEND_Model* model);
17301730

include/triton/core/tritonserver.h

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@ struct TRITONSERVER_MetricFamily;
9191
/// }
9292
///
9393
#define TRITONSERVER_API_VERSION_MAJOR 1
94-
#define TRITONSERVER_API_VERSION_MINOR 32
94+
#define TRITONSERVER_API_VERSION_MINOR 33
9595

9696
/// Get the TRITONBACKEND API version supported by the Triton shared
9797
/// library. This value can be compared against the
@@ -732,7 +732,8 @@ typedef enum tritonserver_traceactivity_enum {
732732
TRITONSERVER_TRACE_REQUEST_END = 6,
733733
TRITONSERVER_TRACE_TENSOR_QUEUE_INPUT = 7,
734734
TRITONSERVER_TRACE_TENSOR_BACKEND_INPUT = 8,
735-
TRITONSERVER_TRACE_TENSOR_BACKEND_OUTPUT = 9
735+
TRITONSERVER_TRACE_TENSOR_BACKEND_OUTPUT = 9,
736+
TRITONSERVER_TRACE_CUSTOM_ACTIVITY = 10
736737
} TRITONSERVER_InferenceTraceActivity;
737738

738739
/// Get the string representation of a trace activity. The returned
@@ -838,6 +839,18 @@ TRITONSERVER_InferenceTraceTensorNew(
838839
TRITONSERVER_InferenceTraceTensorActivityFn_t tensor_activity_fn,
839840
TRITONSERVER_InferenceTraceReleaseFn_t release_fn, void* trace_userp);
840841

842+
/// Report a trace activity. All the traces reported using this API will be
843+
/// using TRITONSERVER_TRACE_CUSTOM_ACTIVITY type.
844+
///
845+
/// \param trace The trace object.
846+
/// \param timestamp The timestamp associated with the trace activity.
847+
/// \param name The trace activity name.
848+
/// \return a TRITONSERVER_Error indicating success or failure.
849+
TRITONSERVER_DECLSPEC TRITONSERVER_Error*
850+
TRITONSERVER_InferenceTraceReportActivity(
851+
TRITONSERVER_InferenceTrace* trace, uint64_t timestamp,
852+
const char* activity_name);
853+
841854
/// Delete a trace object.
842855
///
843856
/// \param trace The trace object.
@@ -921,7 +934,6 @@ TRITONSERVER_DECLSPEC struct TRITONSERVER_Error*
921934
TRITONSERVER_InferenceTraceSetContext(
922935
struct TRITONSERVER_InferenceTrace* trace, const char* trace_context);
923936

924-
925937
/// Get TRITONSERVER_InferenceTrace context.
926938
///
927939
/// \param trace The trace.

python/test/test_api.py

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -345,11 +345,6 @@ def test_ready(self):
345345
server = tritonserver.Server(self._server_options).start()
346346
self.assertTrue(server.ready())
347347

348-
@pytest.mark.xfail(
349-
tritonserver.__version__ <= "2.48.0",
350-
reason="Known issue on stop: Exit timeout expired. Exiting immediately",
351-
raises=tritonserver.InternalError,
352-
)
353348
def test_stop(self):
354349
server = tritonserver.Server(self._server_options).start(wait_until_ready=True)
355350

python/tritonserver/_c/tritonserver_pybind.cc

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
1+
// Copyright 2023-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
22
//
33
// Redistribution and use in source and binary forms, with or without
44
// modification, are permitted provided that the following conditions
@@ -1434,7 +1434,18 @@ class PyServer : public PyWrapper<struct TRITONSERVER_Server> {
14341434
owned_ = true;
14351435
}
14361436

1437-
void Stop() const { ThrowIfError(TRITONSERVER_ServerStop(triton_object_)); }
1437+
void Stop() const
1438+
{
1439+
// ServerStop is blocking for the duration of the server exit timeout, so
1440+
// ensure to release the GIL. This can allow request release callbacks
1441+
// to be interleaved while server is waiting for live requests/models
1442+
// to complete. Without releasing GIL, this function may acquire the GIL
1443+
// first and block the Triton request from being released/freed, thus
1444+
// blocking the server's shutdown in a circular manner thinking a model is
1445+
// still alive.
1446+
py::gil_scoped_release release;
1447+
ThrowIfError(TRITONSERVER_ServerStop(triton_object_));
1448+
}
14381449

14391450
void RegisterModelRepository(
14401451
const std::string& repository_path,

src/backend_model_instance.cc

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
#include "backend_config.h"
3737
#include "backend_model.h"
3838
#include "cuda_utils.h"
39+
#include "infer_stats.h"
3940
#include "metrics.h"
4041
#include "model_config.pb.h"
4142
#include "numa_utils.h"
@@ -558,7 +559,8 @@ TritonModelInstance::PrepareRequestsOrRespond(
558559
// If any errors occurred, respond with error for each request.
559560
if (!status.IsOk()) {
560561
for (auto& r : requests) {
561-
InferenceRequest::RespondIfError(r, status, true /* release_requests */);
562+
InferenceRequest::RespondIfError(
563+
r, status, true /* release_requests */, FailureReason::OTHER);
562564
}
563565
// Log a single error for batch of requests for better visibility
564566
LOG_STATUS_ERROR(status, "Requests failed pre-execution checks");
@@ -685,7 +687,16 @@ TritonModelInstance::Execute(
685687
for (TRITONBACKEND_Request* tr : triton_requests) {
686688
std::unique_ptr<InferenceRequest> ur(
687689
reinterpret_cast<InferenceRequest*>(tr));
688-
InferenceRequest::RespondIfError(ur, status, true /* release_requests */);
690+
// NOTE: If a backend both returns an error in
691+
// TRITONBACKEND_ModelInstanceExecute and reports an error with
692+
// TRITONBACKEND_ModelInstanceReportStatistics, this can result in double
693+
// counting of the failure metric for the same request. However, it is
694+
// currently not expected for this to be a common case, as the return
695+
// value of TRITONBACKEND_ModelInstanceExecute is used to express
696+
// ownership of the request rather than success of an inference request.
697+
// See tritonbackend.h for more details on this.
698+
InferenceRequest::RespondIfError(
699+
ur, status, true /* release_requests */, FailureReason::BACKEND);
689700
}
690701

691702
TRITONSERVER_ErrorDelete(err);

src/dynamic_batch_scheduler.cc

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -50,11 +50,12 @@ IsStaleState(Payload::State payload_state)
5050
void
5151
FinishSkippedRequests(
5252
std::vector<std::deque<std::unique_ptr<InferenceRequest>>>&& requests,
53-
const Status& response_status)
53+
const Status& response_status, FailureReason reason)
5454
{
5555
for (auto& queue : requests) {
5656
for (auto& request : queue) {
57-
InferenceRequest::RespondIfError(request, response_status, true);
57+
InferenceRequest::RespondIfError(
58+
request, response_status, true /* release_requests */, reason);
5859
}
5960
}
6061
}
@@ -69,8 +70,10 @@ FinishRejectedCancelledRequests(
6970
const static Status rejected_status =
7071
Status(Status::Code::UNAVAILABLE, "Request timeout expired");
7172
const static Status cancelled_status = Status(Status::Code::CANCELLED);
72-
FinishSkippedRequests(std::move(rejected_requests), rejected_status);
73-
FinishSkippedRequests(std::move(cancelled_requests), cancelled_status);
73+
FinishSkippedRequests(
74+
std::move(rejected_requests), rejected_status, FailureReason::REJECTED);
75+
FinishSkippedRequests(
76+
std::move(cancelled_requests), cancelled_status, FailureReason::CANCELED);
7477
}
7578

7679
DynamicBatchScheduler::DynamicBatchScheduler(

src/ensemble_scheduler/ensemble_scheduler.cc

Lines changed: 19 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -81,23 +81,26 @@ class RequestTracker {
8181
std::lock_guard<std::mutex> lk(mtx_);
8282
inflight_request_counter_--;
8383
if (inflight_request_counter_ == 0) {
84+
if (request_ != nullptr) {
8485
#ifdef TRITON_ENABLE_STATS
85-
const auto& infer_stats = context_stats_aggregator_.ImmutableInferStats();
86-
request_->ReportStatisticsWithDuration(
87-
metric_reporter_, status_.IsOk(), compute_start_ns_,
88-
infer_stats.compute_input_duration_ns_,
89-
infer_stats.compute_infer_duration_ns_,
90-
infer_stats.compute_output_duration_ns_);
91-
if (status_.IsOk()) {
92-
stats_aggregator_->UpdateInferBatchStatsWithDuration(
93-
metric_reporter_, std::max(1U, request_->BatchSize()),
86+
const auto& infer_stats =
87+
context_stats_aggregator_.ImmutableInferStats();
88+
request_->ReportStatisticsWithDuration(
89+
metric_reporter_, status_.IsOk(), compute_start_ns_,
9490
infer_stats.compute_input_duration_ns_,
9591
infer_stats.compute_infer_duration_ns_,
9692
infer_stats.compute_output_duration_ns_);
97-
}
93+
if (status_.IsOk()) {
94+
stats_aggregator_->UpdateInferBatchStatsWithDuration(
95+
metric_reporter_, std::max(1U, request_->BatchSize()),
96+
infer_stats.compute_input_duration_ns_,
97+
infer_stats.compute_infer_duration_ns_,
98+
infer_stats.compute_output_duration_ns_);
99+
}
98100
#endif
99-
InferenceRequest::Release(
100-
std::move(request_), TRITONSERVER_REQUEST_RELEASE_ALL);
101+
InferenceRequest::Release(
102+
std::move(request_), TRITONSERVER_REQUEST_RELEASE_ALL);
103+
}
101104
}
102105
return (inflight_request_counter_ == 0);
103106
}
@@ -1136,7 +1139,8 @@ EnsembleContext::FinishEnsemble(std::unique_ptr<InferenceResponse>&& response)
11361139
"more "
11371140
"ensemble steps can be made");
11381141
InferenceRequest::RespondIfError(
1139-
request_tracker_->Request(), ensemble_status_);
1142+
request_tracker_->Request(), ensemble_status_,
1143+
false /* release_requests */, FailureReason::OTHER);
11401144
} else {
11411145
request_tracker_->Request()->ResponseFactory()->SendFlags(
11421146
TRITONSERVER_RESPONSE_COMPLETE_FINAL);
@@ -1149,7 +1153,8 @@ EnsembleContext::FinishEnsemble(std::unique_ptr<InferenceResponse>&& response)
11491153
ensemble_status_);
11501154
} else {
11511155
InferenceRequest::RespondIfError(
1152-
request_tracker_->Request(), ensemble_status_);
1156+
request_tracker_->Request(), ensemble_status_,
1157+
false /* release_requests */, FailureReason::OTHER);
11531158
}
11541159
}
11551160

src/infer_request.cc

Lines changed: 43 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -421,10 +421,25 @@ InferenceRequest::Run(std::unique_ptr<InferenceRequest>& request)
421421
return status;
422422
}
423423

424+
FailureReason
425+
stringToFailureReason(const std::string& error_type)
426+
{
427+
if (error_type == "REJECTED") {
428+
return FailureReason::REJECTED;
429+
}
430+
if (error_type == "CANCELED") {
431+
return FailureReason::CANCELED;
432+
}
433+
if (error_type == "BACKEND") {
434+
return FailureReason::BACKEND;
435+
}
436+
return FailureReason::OTHER;
437+
}
438+
424439
void
425440
InferenceRequest::RespondIfError(
426441
std::unique_ptr<InferenceRequest>& request, const Status& status,
427-
const bool release_request)
442+
const bool release_request, FailureReason reason)
428443
{
429444
if (status.IsOk()) {
430445
return;
@@ -442,7 +457,10 @@ InferenceRequest::RespondIfError(
442457
InferenceResponse::SendWithStatus(
443458
std::move(response), TRITONSERVER_RESPONSE_COMPLETE_FINAL, status),
444459
(request->LogRequest() + "failed to send error response").c_str());
445-
460+
#ifdef TRITON_ENABLE_STATS
461+
request->ReportErrorStatistics(
462+
request->model_raw_->MetricReporter().get(), reason);
463+
#endif
446464
// If releasing the request then invoke the release callback which
447465
// gives ownership to the callback. So can't access 'request' after
448466
// this point.
@@ -452,20 +470,6 @@ InferenceRequest::RespondIfError(
452470
}
453471
}
454472

455-
void
456-
InferenceRequest::RespondIfError(
457-
std::vector<std::unique_ptr<InferenceRequest>>& requests,
458-
const Status& status, const bool release_requests)
459-
{
460-
if (status.IsOk()) {
461-
return;
462-
}
463-
464-
for (auto& request : requests) {
465-
RespondIfError(request, status, release_requests);
466-
}
467-
}
468-
469473
Status
470474
InferenceRequest::Release(
471475
std::unique_ptr<InferenceRequest>&& request, const uint32_t release_flags)
@@ -1389,6 +1393,21 @@ InferenceRequest::ValidateBytesInputs(
13891393
}
13901394

13911395
#ifdef TRITON_ENABLE_STATS
1396+
1397+
void
1398+
InferenceRequest::ReportErrorStatistics(
1399+
MetricModelReporter* metric_reporter, FailureReason reason)
1400+
{
1401+
INFER_STATS_DECL_TIMESTAMP(request_end_ns);
1402+
model_raw_->MutableStatsAggregator()->UpdateFailure(
1403+
metric_reporter, request_start_ns_, request_end_ns, reason);
1404+
if (secondary_stats_aggregator_ != nullptr) {
1405+
secondary_stats_aggregator_->UpdateFailure(
1406+
nullptr /* metric_reporter */, request_start_ns_, request_end_ns,
1407+
reason);
1408+
}
1409+
}
1410+
13921411
void
13931412
InferenceRequest::ReportStatistics(
13941413
MetricModelReporter* metric_reporter, bool success,
@@ -1425,10 +1444,12 @@ InferenceRequest::ReportStatistics(
14251444
}
14261445
} else {
14271446
model_raw_->MutableStatsAggregator()->UpdateFailure(
1428-
metric_reporter, request_start_ns_, request_end_ns);
1447+
metric_reporter, request_start_ns_, request_end_ns,
1448+
FailureReason::BACKEND);
14291449
if (secondary_stats_aggregator_ != nullptr) {
14301450
secondary_stats_aggregator_->UpdateFailure(
1431-
nullptr /* metric_reporter */, request_start_ns_, request_end_ns);
1451+
nullptr /* metric_reporter */, request_start_ns_, request_end_ns,
1452+
FailureReason::BACKEND);
14321453
}
14331454
}
14341455
}
@@ -1461,10 +1482,12 @@ InferenceRequest::ReportStatisticsWithDuration(
14611482
}
14621483
} else {
14631484
model_raw_->MutableStatsAggregator()->UpdateFailure(
1464-
metric_reporter, request_start_ns_, request_end_ns);
1485+
metric_reporter, request_start_ns_, request_end_ns,
1486+
FailureReason::OTHER);
14651487
if (secondary_stats_aggregator_ != nullptr) {
14661488
secondary_stats_aggregator_->UpdateFailure(
1467-
nullptr /* metric_reporter */, request_start_ns_, request_end_ns);
1489+
nullptr /* metric_reporter */, request_start_ns_, request_end_ns,
1490+
FailureReason::OTHER);
14681491
}
14691492
}
14701493
}
@@ -1868,5 +1891,4 @@ operator!=(
18681891
{
18691892
return !(lhs == rhs);
18701893
}
1871-
18721894
}} // namespace triton::core

src/infer_request.h

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -590,7 +590,8 @@ class InferenceRequest {
590590
// 'release_request' is true 'request' is returned as nullptr.
591591
static void RespondIfError(
592592
std::unique_ptr<InferenceRequest>& request, const Status& status,
593-
const bool release_request = false);
593+
const bool release_request = false,
594+
FailureReason reason = FailureReason::OTHER);
594595

595596
// Send an error response to a set of 'requests'. If 'status' is
596597
// Success then no responses are sent and the requests are not
@@ -603,7 +604,8 @@ class InferenceRequest {
603604
// returned with all nullptrs.
604605
static void RespondIfError(
605606
std::vector<std::unique_ptr<InferenceRequest>>& requests,
606-
const Status& status, const bool release_requests = false);
607+
const Status& status, const bool release_requests = false,
608+
FailureReason reason = FailureReason::OTHER);
607609

608610
// Release the request. Call the release callback and transfer
609611
// ownership of the request to the callback. On return 'request' is
@@ -673,6 +675,16 @@ class InferenceRequest {
673675
const uint64_t compute_start_ns, const uint64_t compute_input_end_ns,
674676
const uint64_t compute_output_start_ns, const uint64_t compute_end_ns);
675677

678+
// Report the error statistics to stats collectors associated with the
679+
// request.
680+
// FIXME: A separate function may not be necessary here, but is being used
681+
// cautiously in case of unforeseen issues such as possibly capturing a trace
682+
// twice. This should be revisited and better tested to see if the
683+
// ReportStatistics function can be used as-is for the newly captured failure
684+
// cases.
685+
void ReportErrorStatistics(
686+
MetricModelReporter* metric_reporter, FailureReason reason);
687+
676688
// Report the statistics to stats collectors associated with the request.
677689
// Duration and timestamps provide two granularities for stats collectors.
678690
void ReportStatisticsWithDuration(

0 commit comments

Comments
 (0)