@@ -9,7 +9,7 @@ namespace atlasagent
99
1010constexpr auto MICROS = 1000 * 1000.0 ;
1111
12- void CGroup::network_stats () noexcept
12+ void CGroup::NetworkStats () noexcept
1313{
1414 auto megabits = std::getenv (" TITUS_NUM_NETWORK_BANDWIDTH" );
1515
@@ -24,7 +24,7 @@ void CGroup::network_stats() noexcept
2424 }
2525}
2626
27- void CGroup::pressure_stall () noexcept
27+ void CGroup::PressureStall () noexcept
2828{
2929 auto lines = read_lines_fields (path_prefix_, " cpu.pressure" );
3030
@@ -58,79 +58,85 @@ void CGroup::pressure_stall() noexcept
5858 }
5959}
6060
61- void CGroup::cpu_throttle_v2 ( ) noexcept
61+ void CGroup::CpuThrottleV2 ( const std::unordered_map<std::string, int64_t >& stats ) noexcept
6262{
63- std::unordered_map<std::string, int64_t > stats;
64- parse_kv_from_file (path_prefix_, " cpu.stat" , &stats);
65-
6663 static auto prev_throttled_time = static_cast <int64_t >(-1 );
67- auto cur_throttled_time = stats[ " throttled_usec" ] ;
64+ auto cur_throttled_time = stats. at ( " throttled_usec" ) ;
6865 if (prev_throttled_time >= 0 )
6966 {
7067 auto seconds = (cur_throttled_time - prev_throttled_time) / MICROS;
7168 registry_->CreateCounter (" cgroup.cpu.throttledTime" ).Increment (seconds);
7269 }
7370 prev_throttled_time = cur_throttled_time;
7471
75- registry_->CreateMonotonicCounter (" cgroup.cpu.numThrottled" ).Set (stats[ " nr_throttled" ] );
72+ registry_->CreateMonotonicCounter (" cgroup.cpu.numThrottled" ).Set (stats. at ( " nr_throttled" ) );
7673}
7774
78- void CGroup::cpu_time_v2 ( ) noexcept
75+ void CGroup::CpuTimeV2 ( const std::unordered_map<std::string, int64_t >& stats ) noexcept
7976{
80- std::unordered_map<std::string, int64_t > stats;
81- parse_kv_from_file (path_prefix_, " cpu.stat" , &stats);
82-
8377 static auto prev_proc_time = static_cast <int64_t >(-1 );
8478 if (prev_proc_time >= 0 )
8579 {
86- auto secs = (stats[ " usage_usec" ] - prev_proc_time) / MICROS;
80+ auto secs = (stats. at ( " usage_usec" ) - prev_proc_time) / MICROS;
8781 registry_->CreateCounter (" cgroup.cpu.processingTime" ).Increment (secs);
8882 }
89- prev_proc_time = stats[ " usage_usec" ] ;
83+ prev_proc_time = stats. at ( " usage_usec" ) ;
9084
9185 static auto prev_sys_usage = static_cast <int64_t >(-1 );
9286 if (prev_sys_usage >= 0 )
9387 {
94- auto secs = (stats[ " system_usec" ] - prev_sys_usage) / MICROS;
88+ auto secs = (stats. at ( " system_usec" ) - prev_sys_usage) / MICROS;
9589 registry_->CreateCounter (" cgroup.cpu.usageTime" , {{" id" , " system" }}).Increment (secs);
90+
9691 }
97- prev_sys_usage = stats[ " system_usec" ] ;
92+ prev_sys_usage = stats. at ( " system_usec" ) ;
9893
9994 static auto prev_user_usage = static_cast <int64_t >(-1 );
10095 if (prev_user_usage >= 0 )
10196 {
102- auto secs = (stats[ " user_usec" ] - prev_user_usage) / MICROS;
97+ auto secs = (stats. at ( " user_usec" ) - prev_user_usage) / MICROS;
10398 registry_->CreateCounter (" cgroup.cpu.usageTime" , {{" id" , " user" }}).Increment (secs);
10499 }
105- prev_user_usage = stats[ " user_usec" ] ;
100+ prev_user_usage = stats. at ( " user_usec" ) ;
106101}
107102
108- double CGroup::get_avail_cpu_time ( double delta_t , double num_cpu ) noexcept
103+ double CGroup::GetAvailCpuTime ( const double delta_t , const double cpuCount ) noexcept
109104{
110105 auto cpu_max = read_num_vector_from_file (path_prefix_, " cpu.max" );
111106 auto cfs_period = cpu_max[1 ];
112- auto cfs_quota = cfs_period * num_cpu ;
107+ auto cfs_quota = cfs_period * cpuCount ;
113108 return (delta_t / cfs_period) * cfs_quota;
114109}
115110
116- double CGroup::get_num_cpu () noexcept
111+ double CGroup::GetNumCpu () noexcept
117112{
118113 auto env_num_cpu = std::getenv (" TITUS_NUM_CPU" );
119- auto num_cpu = 0.0 ;
114+ auto cpuCount = 0.0 ;
120115 if (env_num_cpu != nullptr )
121116 {
122- num_cpu = strtod (env_num_cpu, nullptr );
117+ cpuCount = strtod (env_num_cpu, nullptr );
118+ }
119+ return cpuCount;
120+ }
121+
122+ void CGroup::CpuProcessingCapacity (const absl::Time& now, const double cpuCount, const absl::Duration& interval) noexcept
123+ {
124+ static absl::Time last_updated;
125+ if (last_updated == absl::UnixEpoch ())
126+ {
127+ last_updated = now - interval;
123128 }
124- return num_cpu;
129+ auto delta_t = absl::ToDoubleSeconds (now - last_updated);
130+ last_updated = now;
131+ registry_->CreateCounter (" cgroup.cpu.processingCapacity" ).Increment (delta_t * cpuCount);
125132}
126133
127- void CGroup::cpu_utilization_v2 ( absl::Time now) noexcept
134+ void CGroup::CpuUtilizationV2 ( const absl::Time& now, const double cpuCount, const absl::Duration& interval ) noexcept
128135{
129136 static absl::Time last_updated;
130137 if (last_updated == absl::UnixEpoch ())
131138 {
132- // ensure cgroup.cpu.processingCapacity has a consistent value after one sample
133- last_updated = now - update_interval_;
139+ last_updated = now - interval;
134140 }
135141 auto delta_t = absl::ToDoubleSeconds (now - last_updated);
136142 last_updated = now;
@@ -141,12 +147,9 @@ void CGroup::cpu_utilization_v2(absl::Time now) noexcept
141147 registry_->CreateGauge (" cgroup.cpu.weight" ).Set (weight);
142148 }
143149
144- auto num_cpu = get_num_cpu ();
145- auto avail_cpu_time = get_avail_cpu_time (delta_t , num_cpu);
146-
147- registry_->CreateCounter (" cgroup.cpu.processingCapacity" ).Increment (delta_t * num_cpu);
148- registry_->CreateGauge (" sys.cpu.numProcessors" ).Set (num_cpu);
149- registry_->CreateGauge (" titus.cpu.requested" ).Set (num_cpu);
150+ auto avail_cpu_time = GetAvailCpuTime (delta_t , cpuCount);
151+ registry_->CreateGauge (" sys.cpu.numProcessors" ).Set (cpuCount);
152+ registry_->CreateGauge (" titus.cpu.requested" ).Set (cpuCount);
150153
151154 std::unordered_map<std::string, int64_t > stats;
152155 parse_kv_from_file (path_prefix_, " cpu.stat" , &stats);
@@ -168,36 +171,57 @@ void CGroup::cpu_utilization_v2(absl::Time now) noexcept
168171 prev_user_time = stats[" user_usec" ];
169172}
170173
171- void CGroup::cpu_peak_utilization_v2 (absl::Time now) noexcept
174+ void CGroup::CpuPeakUtilizationV2 (const absl::Time& now, const std::unordered_map<std::string, int64_t >& stats,
175+ const double cpuCount) noexcept
172176{
173177 static absl::Time last_updated;
174178 auto delta_t = absl::ToDoubleSeconds (now - last_updated);
175179 last_updated = now;
176180
177- auto num_cpu = get_num_cpu ();
178- auto avail_cpu_time = get_avail_cpu_time (delta_t , num_cpu);
179-
180- std::unordered_map<std::string, int64_t > stats;
181- parse_kv_from_file (path_prefix_, " cpu.stat" , &stats);
181+ auto avail_cpu_time = GetAvailCpuTime (delta_t , cpuCount);
182182
183183 static auto prev_system_time = static_cast <int64_t >(-1 );
184184 if (prev_system_time >= 0 )
185185 {
186- auto secs = (stats[ " system_usec" ] - prev_system_time) / MICROS;
186+ auto secs = (stats. at ( " system_usec" ) - prev_system_time) / MICROS;
187187 registry_->CreateMaxGauge (" sys.cpu.peakUtilization" , {{" id" , " system" }}).Set ((secs / avail_cpu_time) * 100 );
188188 }
189- prev_system_time = stats[ " system_usec" ] ;
189+ prev_system_time = stats. at ( " system_usec" ) ;
190190
191191 static auto prev_user_time = static_cast <int64_t >(-1 );
192192 if (prev_user_time >= 0 )
193193 {
194- auto secs = (stats[ " user_usec" ] - prev_user_time) / MICROS;
194+ auto secs = (stats. at ( " user_usec" ) - prev_user_time) / MICROS;
195195 registry_->CreateMaxGauge (" sys.cpu.peakUtilization" , {{" id" , " user" }}).Set ((secs / avail_cpu_time) * 100 );
196196 }
197- prev_user_time = stats[" user_usec" ];
197+ prev_user_time = stats.at (" user_usec" );
198+ }
199+
200+ void CGroup::CpuStats (const bool fiveSecondMetricsEnabled, const bool sixtySecondMetricsEnabled)
201+ {
202+ std::unordered_map<std::string, int64_t > stats;
203+ parse_kv_from_file (path_prefix_, " cpu.stat" , &stats);
204+ auto cpuCount = GetNumCpu ();
205+
206+ // Collect 60 second metrics if enabled
207+ if (sixtySecondMetricsEnabled)
208+ {
209+ CpuThrottleV2 (stats);
210+ CpuUtilizationV2 (absl::Now (), cpuCount, absl::Seconds (60 ));
211+ }
212+
213+ // Collect 5 second metrics if enabled
214+ if (fiveSecondMetricsEnabled)
215+ {
216+ CpuTimeV2 (stats);
217+ CpuProcessingCapacity (absl::Now (), cpuCount, absl::Seconds (5 ));
218+ }
219+
220+ // Always collect peak stats (called every 1 second)
221+ CpuPeakUtilizationV2 (absl::Now (), stats, cpuCount);
198222}
199223
200- void CGroup::memory_stats_v2 () noexcept
224+ void CGroup::MemoryStatsV2 () noexcept
201225{
202226 auto usage_bytes = read_num_from_file (path_prefix_, " memory.current" );
203227 if (usage_bytes >= 0 )
@@ -237,7 +261,7 @@ void CGroup::memory_stats_v2() noexcept
237261 registry_->CreateMonotonicCounter (" cgroup.mem.pageFaults" , {{" id" , " major" }}).Set (stats[" pgmajfault" ]);
238262}
239263
240- void CGroup::memory_stats_std_v2 () noexcept
264+ void CGroup::MemoryStatsStdV2 () noexcept
241265{
242266 auto mem_limit = read_num_from_file (path_prefix_, " memory.max" );
243267 auto mem_usage = read_num_from_file (path_prefix_, " memory.current" );
@@ -271,13 +295,4 @@ void CGroup::memory_stats_std_v2() noexcept
271295 }
272296}
273297
274- void CGroup::do_cpu_stats (absl::Time now) noexcept
275- {
276- cpu_throttle_v2 ();
277- cpu_time_v2 ();
278- cpu_utilization_v2 (now);
279- }
280-
281- void CGroup::do_cpu_peak_stats (absl::Time now) noexcept { cpu_peak_utilization_v2 (now); }
282-
283298} // namespace atlasagent
0 commit comments