Skip to content

Commit 2585eb9

Browse files
Merge pull request #1 from triton-inference-server/main
Merge main from tif
2 parents 2f848ee + 0089bb7 commit 2585eb9

27 files changed

+405
-144
lines changed

include/triton/core/tritonserver.h

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@ struct TRITONSERVER_MetricFamily;
9191
/// }
9292
///
9393
#define TRITONSERVER_API_VERSION_MAJOR 1
94-
#define TRITONSERVER_API_VERSION_MINOR 30
94+
#define TRITONSERVER_API_VERSION_MINOR 31
9595

9696
/// Get the TRITONBACKEND API version supported by the Triton shared
9797
/// library. This value can be compared against the
@@ -1828,6 +1828,16 @@ TRITONSERVER_DECLSPEC struct TRITONSERVER_Error*
18281828
TRITONSERVER_ServerOptionsSetStrictModelConfig(
18291829
struct TRITONSERVER_ServerOptions* options, bool strict);
18301830

1831+
/// Set the custom model configuration name to load for all models.
1832+
/// Fall back to default config file if empty.
1833+
///
1834+
/// \param options The server options object.
1835+
/// \param config_name The name of the config file to load for all models.
1836+
/// \return a TRITONSERVER_Error indicating success or failure.
1837+
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error*
1838+
TRITONSERVER_ServerOptionsSetModelConfigName(
1839+
struct TRITONSERVER_ServerOptions* options, const char* model_config_name);
1840+
18311841
/// Set the rate limit mode in a server options.
18321842
///
18331843
/// TRITONSERVER_RATE_LIMIT_EXEC_COUNT: The rate limiting prioritizes the

python/test/test_api.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -346,7 +346,7 @@ def test_ready(self):
346346
self.assertTrue(server.ready())
347347

348348
@pytest.mark.xfail(
349-
tritonserver.__version__ <= "2.43.0",
349+
tritonserver.__version__ <= "2.46.0",
350350
reason="Known issue on stop: Exit timeout expired. Exiting immediately",
351351
raises=tritonserver.InternalError,
352352
)

python/tritonserver/_api/_server.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -137,7 +137,7 @@ class Options:
137137
List of models to load at startup. Only relevant with ModelControlMode.EXPLICIT.
138138
See :c:func:`TRITONSERVER_ServerOptionsSetStartupModel`
139139
140-
strict_model_config : bool, default True
140+
strict_model_config : bool, default False
141141
Enable or disable strict model configuration.
142142
See :c:func:`TRITONSERVER_ServerOptionsSetStrictModelConfig`
143143
@@ -275,7 +275,7 @@ class Options:
275275
server_id: str = "triton"
276276
model_control_mode: ModelControlMode = ModelControlMode.NONE
277277
startup_models: list[str] = field(default_factory=list[str])
278-
strict_model_config: bool = True
278+
strict_model_config: bool = False
279279

280280
rate_limiter_mode: RateLimitMode = RateLimitMode.OFF
281281
rate_limiter_resources: list[RateLimiterResource] = field(
@@ -507,7 +507,7 @@ def __init__(
507507
508508
Options(server_id='triton', model_repository='/workspace/models',
509509
model_control_mode=<TRITONSERVER_ModelControlMode.NONE: 0>,
510-
startup_models=[], strict_model_config=True,
510+
startup_models=[], strict_model_config=False,
511511
rate_limiter_mode=<TRITONSERVER_RateLimitMode.OFF: 0>,
512512
rate_limiter_resources=[], pinned_memory_pool_size=268435456,
513513
cuda_memory_pool_sizes={}, cache_config={},

src/backend_model.cc

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -61,8 +61,9 @@ TritonModel::Create(
6161
InferenceServer* server, const std::string& model_path,
6262
const triton::common::BackendCmdlineConfigMap& backend_cmdline_config_map,
6363
const triton::common::HostPolicyCmdlineConfigMap& host_policy_map,
64-
const int64_t version, inference::ModelConfig model_config,
65-
const bool is_config_provided, std::unique_ptr<TritonModel>* model)
64+
const ModelIdentifier& model_id, const int64_t version,
65+
inference::ModelConfig model_config, const bool is_config_provided,
66+
std::unique_ptr<TritonModel>* model)
6667
{
6768
model->reset();
6869

@@ -143,8 +144,8 @@ TritonModel::Create(
143144

144145
// Create and initialize the model.
145146
std::unique_ptr<TritonModel> local_model(new TritonModel(
146-
server, localized_model_dir, backend, min_compute_capability, version,
147-
model_config, auto_complete_config, backend_cmdline_config_map,
147+
server, localized_model_dir, backend, min_compute_capability, model_id,
148+
version, model_config, auto_complete_config, backend_cmdline_config_map,
148149
host_policy_map));
149150

150151
TritonModel* raw_local_model = local_model.get();
@@ -929,12 +930,14 @@ TritonModel::TritonModel(
929930
InferenceServer* server,
930931
const std::shared_ptr<LocalizedPath>& localized_model_dir,
931932
const std::shared_ptr<TritonBackend>& backend,
932-
const double min_compute_capability, const int64_t version,
933-
const inference::ModelConfig& config, const bool auto_complete_config,
933+
const double min_compute_capability, const ModelIdentifier& model_id,
934+
const int64_t version, const inference::ModelConfig& config,
935+
const bool auto_complete_config,
934936
const triton::common::BackendCmdlineConfigMap& backend_cmdline_config_map,
935937
const triton::common::HostPolicyCmdlineConfigMap& host_policy_map)
936938
: Model(
937-
min_compute_capability, localized_model_dir->Path(), version, config),
939+
min_compute_capability, localized_model_dir->Path(), model_id,
940+
version, config),
938941
server_(server), min_compute_capability_(min_compute_capability),
939942
auto_complete_config_(auto_complete_config),
940943
backend_cmdline_config_map_(backend_cmdline_config_map),

src/backend_model.h

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -61,8 +61,9 @@ class TritonModel : public Model {
6161
InferenceServer* server, const std::string& model_path,
6262
const triton::common::BackendCmdlineConfigMap& backend_cmdline_config_map,
6363
const triton::common::HostPolicyCmdlineConfigMap& host_policy_map,
64-
const int64_t version, inference::ModelConfig model_config,
65-
const bool is_config_provided, std::unique_ptr<TritonModel>* model);
64+
const ModelIdentifier& model_id, const int64_t version,
65+
inference::ModelConfig model_config, const bool is_config_provided,
66+
std::unique_ptr<TritonModel>* model);
6667
~TritonModel();
6768

6869
// Return path to the localized model directory.
@@ -118,8 +119,9 @@ class TritonModel : public Model {
118119
InferenceServer* server,
119120
const std::shared_ptr<LocalizedPath>& localized_model_dir,
120121
const std::shared_ptr<TritonBackend>& backend,
121-
const double min_compute_capability, const int64_t version,
122-
const inference::ModelConfig& config, const bool auto_complete_config,
122+
const double min_compute_capability, const ModelIdentifier& model_id,
123+
const int64_t version, const inference::ModelConfig& config,
124+
const bool auto_complete_config,
123125
const triton::common::BackendCmdlineConfigMap& backend_cmdline_config_map,
124126
const triton::common::HostPolicyCmdlineConfigMap& host_policy_map);
125127

src/backend_model_instance.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -190,7 +190,7 @@ TritonModelInstance::TritonModelInstance(
190190
model_->ResponseCacheEnabled() &&
191191
model_->Server()->ResponseCacheEnabled();
192192
MetricModelReporter::Create(
193-
model_->Name(), model_->Version(), id, response_cache_enabled,
193+
model_->ModelId(), model_->Version(), id, response_cache_enabled,
194194
model_->Config().metric_tags(), &reporter_);
195195
}
196196
#endif // TRITON_ENABLE_METRICS

src/constants.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// Copyright 2018-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
1+
// Copyright 2018-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
22
//
33
// Redistribution and use in source and binary forms, with or without
44
// modification, are permitted provided that the following conditions
@@ -71,7 +71,10 @@ constexpr char kAutoMixedPrecisionExecutionAccelerator[] =
7171
"auto_mixed_precision";
7272

7373
constexpr char kModelConfigPbTxt[] = "config.pbtxt";
74+
constexpr char kPbTxtExtension[] = ".pbtxt";
75+
constexpr char kModelConfigFolder[] = "configs";
7476

77+
constexpr char kMetricsLabelModelNamespace[] = "namespace";
7578
constexpr char kMetricsLabelModelName[] = "model";
7679
constexpr char kMetricsLabelModelVersion[] = "version";
7780
constexpr char kMetricsLabelGpuUuid[] = "gpu_uuid";

src/dynamic_batch_scheduler.cc

Lines changed: 2 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -39,14 +39,6 @@
3939

4040
namespace triton { namespace core {
4141

42-
uint64_t
43-
CaptureTimeNs()
44-
{
45-
return std::chrono::duration_cast<std::chrono::nanoseconds>(
46-
std::chrono::steady_clock::now().time_since_epoch())
47-
.count();
48-
}
49-
5042
bool
5143
IsStaleState(Payload::State payload_state)
5244
{
@@ -753,32 +745,9 @@ DynamicBatchScheduler::CacheLookUp(
753745
std::unique_ptr<InferenceRequest>& request,
754746
std::unique_ptr<InferenceResponse>& cached_response)
755747
{
756-
Status status;
757748
auto cache = model_->Server()->CacheManager()->Cache();
758-
std::unique_ptr<InferenceResponse> local_response;
759-
request->ResponseFactory()->CreateResponse(&local_response);
760-
// Hash request into cache key
761-
std::string key = "";
762-
if (!request->CacheKeyIsSet()) {
763-
status = cache->Hash(*request, &key);
764-
if (!status.IsOk()) {
765-
LOG_ERROR << "Failed to hash request: " << status.Message();
766-
return;
767-
}
768-
request->SetCacheKey(key);
769-
} else {
770-
key = request->CacheKey();
771-
}
772-
773-
// Lookup and capture timestamps
774-
{
775-
request->CaptureCacheLookupStartNs();
776-
status = cache->Lookup(local_response.get(), key);
777-
request->CaptureCacheLookupEndNs();
778-
}
779-
780-
if (status.IsOk() && (local_response != nullptr)) {
781-
cached_response = std::move(local_response);
749+
bool is_lookup_success = CacheLookUpUtil(request, cached_response, cache);
750+
if (is_lookup_success) {
782751
#ifdef TRITON_ENABLE_STATS
783752
// Update model metrics/stats on cache hits
784753
// Backends will update metrics as normal on cache misses

src/ensemble_scheduler/ensemble_model.cc

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// Copyright 2019-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
1+
// Copyright 2019-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
22
//
33
// Redistribution and use in source and binary forms, with or without
44
// modification, are permitted provided that the following conditions
@@ -37,19 +37,21 @@ namespace triton { namespace core {
3737

3838
Status
3939
EnsembleModel::Create(
40-
InferenceServer* server, const std::string& path, const int64_t version,
40+
InferenceServer* server, const std::string& path,
41+
const ModelIdentifier& model_id, const int64_t version,
4142
const inference::ModelConfig& model_config, const bool is_config_provided,
4243
const double min_compute_capability, std::unique_ptr<Model>* model)
4344
{
4445
// Create the ensemble model.
45-
std::unique_ptr<EnsembleModel> local_model(
46-
new EnsembleModel(min_compute_capability, path, version, model_config));
46+
std::unique_ptr<EnsembleModel> local_model(new EnsembleModel(
47+
min_compute_capability, path, model_id, version, model_config));
4748

4849
RETURN_IF_ERROR(local_model->Init(is_config_provided));
4950

5051
std::unique_ptr<Scheduler> scheduler;
5152
RETURN_IF_ERROR(EnsembleScheduler::Create(
52-
local_model->MutableStatsAggregator(), server, model_config, &scheduler));
53+
local_model->MutableStatsAggregator(), server, local_model->ModelId(),
54+
model_config, &scheduler));
5355
RETURN_IF_ERROR(local_model->SetScheduler(std::move(scheduler)));
5456

5557
LOG_VERBOSE(1) << "ensemble model for " << local_model->Name() << std::endl;

src/ensemble_scheduler/ensemble_model.h

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// Copyright 2019-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
1+
// Copyright 2019-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
22
//
33
// Redistribution and use in source and binary forms, with or without
44
// modification, are permitted provided that the following conditions
@@ -39,7 +39,8 @@ class EnsembleModel : public Model {
3939
EnsembleModel(EnsembleModel&&) = default;
4040

4141
static Status Create(
42-
InferenceServer* server, const std::string& path, const int64_t version,
42+
InferenceServer* server, const std::string& path,
43+
const ModelIdentifier& model_id, const int64_t version,
4344
const inference::ModelConfig& model_config, const bool is_config_provided,
4445
const double min_compute_capability, std::unique_ptr<Model>* model);
4546

@@ -48,8 +49,9 @@ class EnsembleModel : public Model {
4849

4950
explicit EnsembleModel(
5051
const double min_compute_capability, const std::string& model_dir,
51-
const int64_t version, const inference::ModelConfig& config)
52-
: Model(min_compute_capability, model_dir, version, config)
52+
const ModelIdentifier& model_id, const int64_t version,
53+
const inference::ModelConfig& config)
54+
: Model(min_compute_capability, model_dir, model_id, version, config)
5355
{
5456
}
5557
friend std::ostream& operator<<(std::ostream&, const EnsembleModel&);

0 commit comments

Comments
 (0)