Merge branch 'triton-inference-server:main' into main

CGranger-sorenson · web-flow · commit ad5f48ce616d · 2024-04-08T20:28:26.000-06:00
diff --git a/include/triton/core/tritonserver.h b/include/triton/core/tritonserver.h
@@ -91,7 +91,7 @@ struct TRITONSERVER_MetricFamily;
 ///   }
 ///
 #define TRITONSERVER_API_VERSION_MAJOR 1
-#define TRITONSERVER_API_VERSION_MINOR 29
+#define TRITONSERVER_API_VERSION_MINOR 30
 
 /// Get the TRITONBACKEND API version supported by the Triton shared
 /// library. This value can be compared against the
@@ -2258,6 +2258,17 @@ TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_ServerDelete(
 TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_ServerStop(
     struct TRITONSERVER_Server* server);
 
+/// Set the exit timeout on the server object. This value overrides the value
+/// initially set through server options and provides a mechanism to update the
+/// exit timeout while the serving is running.
+///
+/// \param server The inference server object.
+/// \param timeout The exit timeout, in seconds.
+/// \return a TRITONSERVER_Error indicating success or failure.
+TRITONSERVER_DECLSPEC struct TRITONSERVER_Error*
+TRITONSERVER_ServerSetExitTimeout(
+    struct TRITONSERVER_Server* server, unsigned int timeout);
+
 /// Register a new model repository. Not available in polling mode.
 ///
 /// \param server The inference server object.
diff --git a/src/sequence_batch_scheduler/sequence_batch_scheduler.cc b/src/sequence_batch_scheduler/sequence_batch_scheduler.cc
@@ -711,6 +711,11 @@ SequenceBatchScheduler::Enqueue(std::unique_ptr<InferenceRequest>& irequest)
   auto sb_itr = sequence_to_batcherseqslot_map_.find(correlation_id);
   auto bl_itr = sequence_to_backlog_map_.find(correlation_id);
 
+  sequencer_->AddReleaseCallback(
+      irequest,
+      [this](std::unique_ptr<InferenceRequest>& request, const uint32_t flags)
+          -> Status { return sequencer_->RescheduleRequest(request, flags); });
+
   // If this request is not starting a new sequence its correlation ID
   // should already be known with a target in either a sequence slot
   // or in the backlog. If it doesn't then the sequence wasn't started
@@ -850,12 +855,6 @@ SequenceBatchScheduler::Enqueue(std::unique_ptr<InferenceRequest>& irequest)
   LOG_VERBOSE(1) << "Enqueuing CORRID " << correlation_id << " into batcher "
                  << model_instance->Name() << ", sequence slot " << seq_slot
                  << ": " << irequest->ModelName();
-
-  sequencer_->AddReleaseCallback(
-      irequest,
-      [this](std::unique_ptr<InferenceRequest>& request, const uint32_t flags)
-          -> Status { return sequencer_->RescheduleRequest(request, flags); });
-
   batchers_[model_instance]->Enqueue(seq_slot, correlation_id, irequest);
   return Status::Success;
 }
diff --git a/src/tritonserver.cc b/src/tritonserver.cc
@@ -2572,6 +2572,17 @@ TRITONSERVER_ServerStop(TRITONSERVER_Server* server)
   return nullptr;  // Success
 }
 
+TRITONAPI_DECLSPEC TRITONSERVER_Error*
+TRITONSERVER_ServerSetExitTimeout(
+    TRITONSERVER_Server* server, unsigned int timeout)
+{
+  tc::InferenceServer* lserver = reinterpret_cast<tc::InferenceServer*>(server);
+  if (lserver != nullptr) {
+    lserver->SetExitTimeoutSeconds(timeout);
+  }
+  return nullptr;  // Success
+}
+
 TRITONSERVER_DECLSPEC TRITONSERVER_Error*
 TRITONSERVER_ServerRegisterModelRepository(
     TRITONSERVER_Server* server, const char* repository_path,
diff --git a/src/tritonserver_stub.cc b/src/tritonserver_stub.cc
@@ -559,6 +559,10 @@ TRITONSERVER_ServerStop()
 {
 }
 TRITONAPI_DECLSPEC void
+TRITONSERVER_ServerSetExitTimeout()
+{
+}
+TRITONAPI_DECLSPEC void
 TRITONSERVER_ServerPollModelRepository()
 {
 }

Original file line number	Diff line number	Diff line change
`@@ -559,6 +559,10 @@ TRITONSERVER_ServerStop()`
`559`	`559`	`{`
`560`	`560`	`}`
`561`	`561`	`TRITONAPI_DECLSPEC void`
	`562`	`+TRITONSERVER_ServerSetExitTimeout()`
	`563`	`+{`
	`564`	`+}`
	`565`	`+TRITONAPI_DECLSPEC void`
`562`	`566`	`TRITONSERVER_ServerPollModelRepository()`
`563`	`567`	`{`
`564`	`568`	`}`