@@ -107,14 +107,6 @@ EnsembleDurations
107107GetTotalEnsembleDurations (const ServerSideStats& stats)
108108{
109109 EnsembleDurations result;
110- // Calculate avg cache hit latency and cache miss latency for ensemble model
111- // in case top level response caching is enabled.
112- const uint64_t ensemble_cache_hit_cnt = stats.cache_hit_count ;
113- const uint64_t ensemble_cache_miss_cnt = stats.cache_miss_count ;
114- result.total_cache_hit_time_avg_us +=
115- AverageDurationInUs (stats.cache_hit_time_ns , ensemble_cache_hit_cnt);
116- result.total_cache_miss_time_avg_us +=
117- AverageDurationInUs (stats.cache_miss_time_ns , ensemble_cache_miss_cnt);
118110 for (const auto & model_stats : stats.composing_models_stat ) {
119111 if (model_stats.second .composing_models_stat .empty ()) {
120112 // Cache hit count covers cache hits, not related to compute times
@@ -246,6 +238,7 @@ ReportServerSideStats(
246238 if (parser->ResponseCacheEnabled ()) {
247239 const uint64_t overhead_avg_us = GetOverheadDuration (
248240 cumm_avg_us, queue_avg_us, combined_cache_compute_avg_us);
241+
249242 std::cout << " (overhead " << overhead_avg_us << " usec + "
250243 << " queue " << queue_avg_us << " usec + "
251244 << " cache hit/miss " << combined_cache_compute_avg_us
@@ -290,18 +283,12 @@ ReportServerSideStats(
290283 const uint64_t overhead_avg_us = GetOverheadDuration (
291284 cumm_avg_us, ensemble_times.total_queue_time_avg_us ,
292285 ensemble_times.total_combined_cache_compute_time_avg_us );
293- // FIXME - Refactor these calculations in case of ensemble top level
294- // response cache is enabled
295- if (!parser->TopLevelResponseCachingEnabled ()) {
296- std::cout << " (overhead " << overhead_avg_us << " usec + "
297- << " queue " << ensemble_times.total_queue_time_avg_us
298- << " usec + "
299- << " cache hit/miss "
300- << ensemble_times.total_combined_cache_compute_time_avg_us
301- << " usec)" << std::endl;
302- } else {
303- std::cout << std::endl;
304- }
286+ std::cout << " (overhead " << overhead_avg_us << " usec + "
287+ << " queue " << ensemble_times.total_queue_time_avg_us
288+ << " usec + "
289+ << " cache hit/miss "
290+ << ensemble_times.total_combined_cache_compute_time_avg_us
291+ << " usec)" << std::endl;
305292 std::cout << ident << ident << " Average Cache Hit Latency: "
306293 << ensemble_times.total_cache_hit_time_avg_us << " usec"
307294 << std::endl;
@@ -1563,21 +1550,6 @@ InferenceProfiler::DetermineStatsModelVersion(
15631550 return cb::Error::Success;
15641551}
15651552
1566- // Only for unit-testing
1567- #ifndef DOCTEST_CONFIG_DISABLE
1568- cb::Error
1569- InferenceProfiler::SetTopLevelResponseCaching (
1570- bool enable_top_level_response_caching)
1571- {
1572- parser_ = std::make_shared<ModelParser>(cb::BackendKind::TRITON);
1573- if (parser_ == nullptr ) {
1574- return cb::Error (" Failed to initialize ModelParser" );
1575- }
1576- parser_->SetTopLevelResponseCaching (enable_top_level_response_caching);
1577- return cb::Error::Success;
1578- }
1579- #endif
1580-
15811553cb::Error
15821554InferenceProfiler::SummarizeServerStats (
15831555 const std::map<cb::ModelIdentifier, cb::ModelStatistics>& start_status,
@@ -1633,20 +1605,8 @@ InferenceProfiler::SummarizeServerStatsHelper(
16331605
16341606 const auto & end_itr = end_status.find (this_id);
16351607 if (end_itr == end_status.end ()) {
1636- // In case of ensemble models, if top level response caching is enabled,
1637- // the composing models statistics are unavailable in case of a cache hit.
1638- // This is due to the scheduler sends cache response and composing models do
1639- // not get executed. It's a valid scenario and shouldn't throw error.
1640- bool stats_not_found_and_invalid =
1641- model_version == -1 && !parser_->TopLevelResponseCachingEnabled ();
1642- if (stats_not_found_and_invalid) {
1643- return cb::Error (
1644- " missing statistics for requested model" , pa::GENERIC_ERROR);
1645- } else {
1646- // Setting server stats 0 for composing model in case of ensemble request
1647- // cache hit since the composing model will not be executed
1648- server_stats->Reset ();
1649- }
1608+ return cb::Error (
1609+ " missing statistics for requested model" , pa::GENERIC_ERROR);
16501610 } else {
16511611 uint64_t start_infer_cnt = 0 ;
16521612 uint64_t start_exec_cnt = 0 ;
0 commit comments