|
36 | 36 | #include "backend_model.h" |
37 | 37 | #include "constants.h" |
38 | 38 | #include "filesystem/api.h" |
| 39 | +#include "metrics.h" |
39 | 40 | #include "model.h" |
40 | 41 | #include "model_config_utils.h" |
41 | 42 | #include "repo_agent.h" |
@@ -559,11 +560,20 @@ ModelLifeCycle::CreateModel( |
559 | 560 | // backend. |
560 | 561 | if (!model_config.backend().empty()) { |
561 | 562 | std::unique_ptr<TritonModel> model; |
| 563 | + const uint64_t model_load_ns = |
| 564 | + std::chrono::duration_cast<std::chrono::nanoseconds>( |
| 565 | + std::chrono::steady_clock::now().time_since_epoch()) |
| 566 | + .count(); |
562 | 567 | status = TritonModel::Create( |
563 | 568 | server_, model_info->model_path_, options_.backend_cmdline_config_map, |
564 | 569 | options_.host_policy_map, model_id, version, model_config, |
565 | 570 | is_config_provided, &model); |
566 | 571 | is.reset(model.release()); |
| 572 | + if (status.IsOk()) { |
| 573 | +#ifdef TRITON_ENABLE_METRICS |
| 574 | + CalculateAndReportLoadTime(model_load_ns, &is); |
| 575 | +#endif // TRITON_ENABLE_METRICS |
| 576 | + } |
567 | 577 | } else { |
568 | 578 | #ifdef TRITON_ENABLE_ENSEMBLE |
569 | 579 | if (model_info->is_ensemble_) { |
@@ -799,10 +809,8 @@ ModelLifeCycle::OnLoadFinal( |
799 | 809 | // Mark current versions ready and track info in foreground |
800 | 810 | for (auto& loaded : load_tracker->load_set_) { |
801 | 811 | std::lock_guard<std::mutex> curr_info_lk(loaded.second->mtx_); |
802 | | - |
803 | 812 | loaded.second->state_ = ModelReadyState::READY; |
804 | 813 | loaded.second->state_reason_.clear(); |
805 | | - |
806 | 814 | auto bit = background_models_.find((uintptr_t)loaded.second); |
807 | 815 | // Check if the version model is loaded in background, if so, |
808 | 816 | // replace and unload the current serving version |
@@ -847,4 +855,35 @@ ModelLifeCycle::OnLoadFinal( |
847 | 855 | } |
848 | 856 | } |
849 | 857 |
|
| 858 | +void |
| 859 | +ModelLifeCycle::CalculateAndReportLoadTime( |
| 860 | + uint64_t load_start_ns_, std::unique_ptr<Model>* model) |
| 861 | +{ |
| 862 | +#ifdef TRITON_ENABLE_METRICS |
| 863 | + auto reporter = (*model)->MetricReporter(); |
| 864 | + const uint64_t now_ns = |
| 865 | + std::chrono::duration_cast<std::chrono::nanoseconds>( |
| 866 | + std::chrono::steady_clock::now().time_since_epoch()) |
| 867 | + .count(); |
| 868 | + uint64_t time_to_load_ns = now_ns - load_start_ns_; |
| 869 | + std::chrono::duration<double> time_to_load = |
| 870 | + std::chrono::duration_cast<std::chrono::duration<double>>( |
| 871 | + std::chrono::nanoseconds(time_to_load_ns)); |
| 872 | + ReportModelLoadTime(reporter, time_to_load); |
| 873 | +#endif // TRITON_ENABLE_METRICS |
| 874 | +} |
| 875 | + |
| 876 | +void |
| 877 | +ModelLifeCycle::ReportModelLoadTime( |
| 878 | + std::shared_ptr<MetricModelReporter> reporter, |
| 879 | + const std::chrono::duration<double>& time_to_load) |
| 880 | +{ |
| 881 | +#ifdef TRITON_ENABLE_METRICS |
| 882 | + if (reporter) { |
| 883 | + double load_time_in_seconds = time_to_load.count(); |
| 884 | + reporter->SetGauge(kModelLoadTimeMetric, load_time_in_seconds); |
| 885 | + } |
| 886 | +#endif // TRITON_ENABLE_METRICS |
| 887 | +} |
| 888 | + |
850 | 889 | }} // namespace triton::core |
0 commit comments