|
4 | 4 |
|
5 | 5 | import math |
6 | 6 | import re |
| 7 | +import time |
7 | 8 | from abc import ABC, abstractmethod |
8 | 9 | from dataclasses import dataclass |
9 | 10 | from typing import Dict, Iterable, List, Optional, Tuple |
@@ -182,6 +183,9 @@ def __init__( |
182 | 183 | self._pid = psutil.Process().pid |
183 | 184 | self._cpu_count = count_cpus() |
184 | 185 | self._process = psutil.Process(self._pid) |
| 186 | + # For process tracking: store last measurement time and CPU times |
| 187 | + self._last_measurement_time: Optional[float] = None |
| 188 | + self._last_cpu_times: Dict[int, float] = {} # pid -> total cpu time |
185 | 189 |
|
186 | 190 | if self._mode == "intel_power_gadget": |
187 | 191 | self._intel_interface = IntelPowerGadget(self._output_dir) |
@@ -245,11 +249,62 @@ def _get_power_from_cpu_load(self): |
245 | 249 | f"CPU load {self._tdp} W and {cpu_load:.1f}% {load_factor=} => estimation of {power} W for whole machine." |
246 | 250 | ) |
247 | 251 | elif self._tracking_mode == "process": |
| 252 | + # Use CPU times for accurate process tracking |
| 253 | + current_time = time.time() |
| 254 | + current_cpu_times: Dict[int, float] = {} |
| 255 | + |
| 256 | + # Get CPU time for main process and all children |
| 257 | + try: |
| 258 | + processes = [self._process] + self._process.children(recursive=True) |
| 259 | + except (psutil.NoSuchProcess, psutil.AccessDenied): |
| 260 | + processes = [self._process] |
| 261 | + |
| 262 | + for proc in processes: |
| 263 | + try: |
| 264 | + cpu_times = proc.cpu_times() |
| 265 | + # Total CPU time = user + system time |
| 266 | + total_cpu_time = cpu_times.user + cpu_times.system |
| 267 | + current_cpu_times[proc.pid] = total_cpu_time |
| 268 | + except (psutil.NoSuchProcess, psutil.AccessDenied): |
| 269 | + logger.debug( |
| 270 | + f"Process {proc.pid} disappeared or access denied when getting CPU times." |
| 271 | + ) |
| 272 | + |
| 273 | + # Calculate CPU usage based on delta |
| 274 | + if self._last_measurement_time is not None: |
| 275 | + time_delta = current_time - self._last_measurement_time |
| 276 | + if time_delta > 0: |
| 277 | + total_cpu_delta = 0.0 |
| 278 | + for pid, cpu_time in current_cpu_times.items(): |
| 279 | + last_cpu_time = self._last_cpu_times.get(pid, cpu_time) |
| 280 | + cpu_delta = cpu_time - last_cpu_time |
| 281 | + if cpu_delta > 0: |
| 282 | + total_cpu_delta += cpu_delta |
| 283 | + logger.debug( |
| 284 | + f"Process {pid} CPU time delta: {cpu_delta:.3f}s" |
| 285 | + ) |
| 286 | + |
| 287 | + # CPU load as percentage (can be > 100% with multiple cores) |
| 288 | + # total_cpu_delta is the CPU time used, time_delta is wall clock time |
| 289 | + cpu_load = (total_cpu_delta / time_delta) * 100 |
| 290 | + logger.debug( |
| 291 | + f"Total CPU delta: {total_cpu_delta:.3f}s over {time_delta:.3f}s = {cpu_load:.1f}% (across {self._cpu_count} cores)" |
| 292 | + ) |
| 293 | + else: |
| 294 | + cpu_load = 0.0 |
| 295 | + else: |
| 296 | + cpu_load = 0.0 |
| 297 | + logger.debug("First measurement, no CPU delta available yet") |
248 | 298 |
|
249 | | - cpu_load = self._process.cpu_percent(interval=0.5) / self._cpu_count |
250 | | - power = self._tdp * cpu_load / 100 |
| 299 | + # Store for next measurement |
| 300 | + self._last_measurement_time = current_time |
| 301 | + self._last_cpu_times = current_cpu_times |
| 302 | + |
| 303 | + # Normalize to percentage of total CPU capacity |
| 304 | + cpu_load_normalized = cpu_load / self._cpu_count |
| 305 | + power = self._tdp * cpu_load_normalized / 100 |
251 | 306 | logger.debug( |
252 | | - f"CPU load {self._tdp} W and {cpu_load * 100:.1f}% => estimation of {power} W for process {self._pid}." |
| 307 | + f"CPU load {self._tdp} W and {cpu_load:.1f}% ({cpu_load_normalized:.1f}% normalized) => estimation of {power:.2f} W for process {self._pid} and {len(current_cpu_times) - 1} children." |
253 | 308 | ) |
254 | 309 | else: |
255 | 310 | raise Exception(f"Unknown tracking_mode {self._tracking_mode}") |
@@ -318,9 +373,13 @@ def measure_power_and_energy(self, last_duration: float) -> Tuple[Power, Energy] |
318 | 373 | def start(self): |
319 | 374 | if self._mode in ["intel_power_gadget", "intel_rapl", "apple_powermetrics"]: |
320 | 375 | self._intel_interface.start() |
| 376 | + # Reset process tracking state for fresh measurements |
| 377 | + self._last_measurement_time = None |
| 378 | + self._last_cpu_times = {} |
321 | 379 | if self._mode == MODE_CPU_LOAD: |
322 | 380 | # The first time this is called it will return a meaningless 0.0 value which you are supposed to ignore. |
323 | 381 | _ = self._get_power_from_cpu_load() |
| 382 | + _ = self._get_power_from_cpu_load() |
324 | 383 |
|
325 | 384 | def monitor_power(self): |
326 | 385 | cpu_power = self._get_power_from_cpus() |
|
0 commit comments