@@ -59,7 +59,7 @@ DynamicBatchScheduler::DynamicBatchScheduler(
5959 TritonModel* model, TritonModelInstance* model_instance,
6060 const bool dynamic_batching_enabled, const int32_t max_batch_size,
6161 const std::unordered_map<std::string, bool >& enforce_equal_shape_tensors,
62- const bool preserve_ordering, const bool response_cache_enable,
62+ const bool preserve_ordering,
6363 const std::set<int32_t >& preferred_batch_sizes,
6464 const uint64_t max_queue_delay_microseconds,
6565 const inference::ModelQueuePolicy& default_queue_policy,
@@ -79,16 +79,8 @@ DynamicBatchScheduler::DynamicBatchScheduler(
7979 rate_limiter_ = model_->Server ()->GetRateLimiter ();
8080 // Both the server and model config should specify
8181 // caching enabled for model to utilize response cache.
82- response_cache_enabled_ =
83- response_cache_enable && model_->Server ()->ResponseCacheEnabled ();
84- #ifdef TRITON_ENABLE_METRICS
85- // Initialize metric reporter for cache statistics if cache enabled
86- if (response_cache_enabled_) {
87- MetricModelReporter::Create (
88- model_name_, model_->Version (), METRIC_REPORTER_ID_RESPONSE_CACHE,
89- response_cache_enabled_, model_->Config ().metric_tags (), &reporter_);
90- }
91- #endif // TRITON_ENABLE_METRICS
82+ response_cache_enabled_ = model_->ResponseCacheEnabled () &&
83+ model_->Server ()->ResponseCacheEnabled ();
9284 max_preferred_batch_size_ = 0 ;
9385 for (const auto size : preferred_batch_sizes_) {
9486 max_preferred_batch_size_ =
@@ -108,7 +100,7 @@ DynamicBatchScheduler::Create(
108100 TritonModel* model, TritonModelInstance* model_instance, const int nice,
109101 const bool dynamic_batching_enabled, const int32_t max_batch_size,
110102 const std::unordered_map<std::string, bool >& enforce_equal_shape_tensors,
111- const bool preserve_ordering, const bool response_cache_enable,
103+ const bool preserve_ordering,
112104 const std::set<int32_t >& preferred_batch_sizes,
113105 const uint64_t max_queue_delay_microseconds,
114106 std::unique_ptr<Scheduler>* scheduler)
@@ -122,8 +114,7 @@ DynamicBatchScheduler::Create(
122114
123115 return Create (
124116 model, model_instance, nice, dynamic_batching_enabled, max_batch_size,
125- enforce_equal_shape_tensors, batcher_config, response_cache_enable,
126- scheduler);
117+ enforce_equal_shape_tensors, batcher_config, scheduler);
127118}
128119
129120Status
@@ -132,7 +123,7 @@ DynamicBatchScheduler::Create(
132123 const bool dynamic_batching_enabled, const int32_t max_batch_size,
133124 const std::unordered_map<std::string, bool >& enforce_equal_shape_tensors,
134125 const inference::ModelDynamicBatching& batcher_config,
135- const bool response_cache_enable, std::unique_ptr<Scheduler>* scheduler)
126+ std::unique_ptr<Scheduler>* scheduler)
136127{
137128 std::set<int32_t > preferred_batch_sizes;
138129 for (const auto size : batcher_config.preferred_batch_size ()) {
@@ -142,8 +133,7 @@ DynamicBatchScheduler::Create(
142133 DynamicBatchScheduler* dyna_sched = new DynamicBatchScheduler (
143134 model, model_instance, dynamic_batching_enabled, max_batch_size,
144135 enforce_equal_shape_tensors, batcher_config.preserve_ordering (),
145- response_cache_enable, preferred_batch_sizes,
146- batcher_config.max_queue_delay_microseconds (),
136+ preferred_batch_sizes, batcher_config.max_queue_delay_microseconds (),
147137 batcher_config.default_queue_policy (), batcher_config.priority_levels (),
148138 batcher_config.priority_queue_policy ());
149139 std::unique_ptr<DynamicBatchScheduler> sched (dyna_sched);
@@ -681,7 +671,7 @@ DynamicBatchScheduler::DelegateResponse(
681671 // Use model_ to update stats directly because request object can be
682672 // released by the backend before getting to this callback.
683673 model_->MutableStatsAggregator ()->UpdateSuccessCacheMiss (
684- reporter_ .get (), cache_miss_ns);
674+ model_-> MetricReporter () .get (), cache_miss_ns);
685675#endif // TRITON_ENABLE_STATS
686676 if (!status.IsOk ()) {
687677 LOG_ERROR << " Failed to insert key [" << key
@@ -736,7 +726,7 @@ DynamicBatchScheduler::CacheLookUp(
736726#ifdef TRITON_ENABLE_STATS
737727 // Update model metrics/stats on cache hits
738728 // Backends will update metrics as normal on cache misses
739- request->ReportStatisticsCacheHit (reporter_ .get ());
729+ request->ReportStatisticsCacheHit (model_-> MetricReporter () .get ());
740730#endif // TRITON_ENABLE_STATS
741731 }
742732}
0 commit comments