Skip to content

Commit 5c36658

Browse files
committed
fix(test/cpu_monitor): use psutil to get more accurate CPU measurements
We were using top to gather CPU usage of the threads of Firecracker. Replace it with a more sane approach which uses psutil. This also fixes a bug due to the CPU monitor lasting longer than the test itself. Signed-off-by: Riccardo Mancini <[email protected]>
1 parent 1253957 commit 5c36658

File tree

1 file changed

+34
-26
lines changed

1 file changed

+34
-26
lines changed

tests/framework/utils.py

Lines changed: 34 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
import typing
1515
from collections import defaultdict, namedtuple
1616
from contextlib import contextmanager
17+
from pathlib import Path
1718
from typing import Dict
1819

1920
import psutil
@@ -29,7 +30,6 @@
2930
FLUSH_CMD = 'screen -S {session} -X colon "logfile flush 0^M"'
3031
CommandReturn = namedtuple("CommandReturn", "returncode stdout stderr")
3132
CMDLOG = logging.getLogger("commands")
32-
GET_CPU_LOAD = "top -bn1 -H -p {} -w512 | tail -n+8"
3333

3434

3535
def get_threads(pid: int) -> dict:
@@ -56,30 +56,43 @@ def set_cpu_affinity(pid: int, cpulist: list) -> list:
5656
return psutil.Process(pid).cpu_affinity(real_cpulist)
5757

5858

59-
def get_cpu_utilization(pid: int) -> Dict[str, float]:
60-
"""Return current process per thread CPU utilization."""
61-
_, stdout, _ = check_output(GET_CPU_LOAD.format(pid))
62-
cpu_utilization = {}
59+
def get_thread_name(pid: int, tid: int) -> str:
60+
"""Return thread name from pid and tid pair."""
61+
return Path("/proc", str(pid), "task", str(tid), "comm").read_text("utf-8").strip()
62+
6363

64-
# Take all except the last line
65-
lines = stdout.strip().split(sep="\n")
66-
for line in lines:
67-
# sometimes the firecracker process will have gone away, in which case top does not return anything
68-
if not line:
69-
continue
64+
CpuTimes = namedtuple("CpuTimes", ["user", "system"])
7065

71-
info = line.strip().split()
72-
# We need at least CPU utilization and threads names cols (which
73-
# might be two cols e.g `fc_vcpu 0`).
74-
info_len = len(info)
75-
assert info_len > 11, line
7666

77-
cpu_percent = float(info[8])
67+
def get_cpu_times(pid: int) -> Dict[str, CpuTimes]:
68+
"""Return a dict mapping thread name to CPU usage (in seconds) since start."""
69+
cpu_times = {}
70+
for thread in psutil.Process(pid).threads():
71+
thread_name = get_thread_name(pid, thread.id)
72+
cpu_times[thread_name] = CpuTimes(thread.user_time, thread.system_time)
73+
return cpu_times
7874

79-
# Handles `fc_vcpu 0` case as well.
80-
thread_name = info[11] + (" " + info[12] if info_len > 12 else "")
81-
cpu_utilization[thread_name] = cpu_percent
8275

76+
def get_cpu_utilization(
77+
pid: int,
78+
interval: int = 1,
79+
split_user_system: bool = False,
80+
) -> Dict[str, float | CpuTimes]:
81+
"""Return current process per thread CPU utilization over the interval (seconds)."""
82+
cpu_utilization = {}
83+
cpu_times_before = get_cpu_times(pid)
84+
time.sleep(interval)
85+
cpu_times_after = get_cpu_times(pid)
86+
threads = set(cpu_times_before.keys()) & set(cpu_times_after.keys())
87+
for thread_name in threads:
88+
before = cpu_times_before[thread_name]
89+
after = cpu_times_after[thread_name]
90+
user = (after.user - before.user) / interval * 100
91+
system = (after.system - before.system) / interval * 100
92+
if split_user_system:
93+
cpu_utilization[thread_name] = CpuTimes(user, system)
94+
else:
95+
cpu_utilization[thread_name] = user + system
8396
return cpu_utilization
8497

8598

@@ -94,18 +107,13 @@ def track_cpu_utilization(
94107
# Sleep first `omit` secconds
95108
time.sleep(omit)
96109

97-
cpu_utilization = {}
110+
cpu_utilization = defaultdict(list)
98111
for _ in range(iterations):
99112
current_cpu_utilization = get_cpu_utilization(pid)
100113
assert len(current_cpu_utilization) > 0
101114

102115
for thread_name, value in current_cpu_utilization.items():
103-
if not cpu_utilization.get(thread_name):
104-
cpu_utilization[thread_name] = []
105116
cpu_utilization[thread_name].append(value)
106-
107-
# 1 second granularity
108-
time.sleep(1)
109117
return cpu_utilization
110118

111119

0 commit comments

Comments
 (0)