Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@
* pSnapper can now run on RHEL5 equivalents (2.6.18 kernel), however with separately installed python26 or later, as I haven't "downgraded" pSnapper's python code to work with python 2.4 (yet)
- you could install python 2.6 or 2.7 manually in your own directory or use the EPEL package: (yum install epel-release ; yum install python26 )
- you will also need to uncomment the 2nd line in psn script (use #!/usr/bin/env/python26 instead of python)
- note that 2.6.18 kernel doesnt provide syscall,file name and kstack sampling (but wchan is available)
- note that 2.6.18 kernel doesnt provide syscall, file name and kstack sampling (but wchan is available)



Expand Down
80 changes: 57 additions & 23 deletions bin/schedlat
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,11 @@
# Licensed under the Apache License, Version 2.0 (the "License")
#
# Name: schedlat.py (v0.2)
# Purpose: display % of time a process spent in CPU runqueue
# Purpose: display % of time a thread spent in CPU runqueue
# (scheduling latency)
# Usage: ./schedlat.py PID
# Usage: ./schedlat.py PID TID
#
# For the main process thread, i.e. the thread group leader, TID = PID
#
# %CPU shows % of time the task spent on CPU
# %LAT shows % of time the task spent trying to get onto CPU (in runqueue)
Expand All @@ -18,28 +20,60 @@ from __future__ import print_function
from datetime import datetime
import time, sys

if len(sys.argv) != 2 or sys.argv[1] == '-h':
print("usage: " + sys.argv[0] + " PID")
exit(1)
NANOSECONDS_PER_SEC = 1000000000
NANOSECONDS_PER_USEC = 1000


if len(sys.argv) != 3 or sys.argv[1] == '-h':
print("usage: " + sys.argv[0] + " PID TID")
exit(1)

pid=sys.argv[1]
pid = sys.argv[1]
tid = sys.argv[2]

with open('/proc/' + pid + '/comm', 'r') as f:
print("SchedLat by Tanel Poder [https://0x.tools]\n\nPID=" + pid + " COMM=" + f.read())

print("%-20s %6s %6s %6s" % ("TIMESTAMP", "%CPU", "%LAT", "%SLP"))

while True:
with open('/proc/' + pid + '/schedstat' , 'r') as f:
t1=time.time()
(cpu_ns1, lat_ns1, dontcare) = f.read().split()
time.sleep(1)
f.seek(0)
t2=time.time()
(cpu_ns2, lat_ns2, dontcare) = f.read().split()

cpu=(int(cpu_ns2)-int(cpu_ns1))/(t2-t1)/10000000
lat=(int(lat_ns2)-int(lat_ns1))/(t2-t1)/10000000

print("%-20s %6.1f %6.1f %6.1f" % (datetime.fromtimestamp(t2).strftime("%Y-%m-%d %H:%M:%S"), cpu, lat, 100-(cpu+lat)))
print("SchedLat by Tanel Poder [https://0x.tools]\n\nTID=%s PID=%s COMM=%s" % (tid, pid, f.read()))

print(u"%s| RUNQ LAT \u00B5s" % (" " * 42))
print("%-20s %6s %6s %6s | %7s %6s" % ("TIMESTAMP", "%CPU", "%LAT", "%SLP", "1-sec", "boot"))

count = 0
lat_us_sum = 0
try:
while True:
with open('/proc/' + pid + '/task/' + tid + '/schedstat', 'r') as f:
t1 = time.time()
(cpu_ns1, lat_ns1, slices1) = f.read().split()
lat_ns1, slices1 = int(lat_ns1), int(slices1)

time.sleep(1)
f.seek(0)

t2 = time.time()
(cpu_ns2, lat_ns2, slices2) = f.read().split()
lat_ns2, slices2 = int(lat_ns2), int(slices2)

cpu = (int(cpu_ns2) - int(cpu_ns1)) / (t2 - t1) / NANOSECONDS_PER_SEC * 100
lat = (lat_ns2 - lat_ns1) / (t2 - t1) / NANOSECONDS_PER_SEC * 100

lat_us = (lat_ns2 - lat_ns1) / (slices2 - slices1) / NANOSECONDS_PER_USEC
lat_us_boot = lat_ns2 / slices2 / NANOSECONDS_PER_USEC

lat_us_sum += lat_us
count += 1

print(
"%-20s %6.1f %6.1f %6.1f | %7.2f %6.2f"
% (
datetime.fromtimestamp(t2).strftime("%Y-%m-%d %H:%M:%S"),
cpu,
lat,
100 - (cpu + lat),
lat_us,
lat_us_boot,
)
)
except KeyboardInterrupt:
print()

print(u"Mean RUNQ LAT: %.2f \u00B5s" % (lat_us_sum / count))
133 changes: 133 additions & 0 deletions bin/schedlatsys
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
#!/usr/bin/env python

# Copyright 2020 Tanel Poder
# Copyright 2024 Kristian Rados
# Licensed under the Apache License, Version 2.0

from datetime import datetime
import time, re, subprocess, argparse

NANOSECONDS_PER_SEC = 1000000000
NANOSECONDS_PER_USEC = 1000
HELP = """SchedLatSys by Kristian Rados and Tanel Poder [https://0x.tools]

%CPU = mean %time spent running tasks on the CPU
%LAT = mean %time spent waiting in the run queue (scheduler latency) by all tasks, normalized by num tasks
%SLP = 100 - (%CPU + %LAT)
"""


def main(args):
print(HELP)
print(u"%s| RUNQ LAT \u00B5s" % (" " * 42))
print(
"%-20s %6s %6s %6s | %7s %6s"
% ("TIMESTAMP", "%CPU", "%LAT", "%SLP", str(args.interval) + "-sec", "boot")
)

count = 0
lat_us_sum = 0
try:
while not args.count or count < args.count:
with open("/proc/schedstat", "r") as f:
t1 = time.time()
(cpu_ns1, lat_ns1, slices1) = parse_schedstat(f.read())

time.sleep(args.interval)
f.seek(0)

t2 = time.time()
(cpu_ns2, lat_ns2, slices2) = parse_schedstat(f.read())

cpu = (cpu_ns2 - cpu_ns1) / (t2 - t1) / NANOSECONDS_PER_SEC * 100

ps = subprocess.Popen(("ps", "-e", "--no-headers"), stdout=subprocess.PIPE)
num_tasks = subprocess.check_output(["wc", "-l"], stdin=ps.stdout)

lat_all = lat_ns2 - lat_ns1
lat_norm = lat_all / int(num_tasks)
lat = lat_norm / (t2 - t1) / NANOSECONDS_PER_SEC * 100

lat_us = (lat_ns2 - lat_ns1) / (slices2 - slices1) / NANOSECONDS_PER_USEC
lat_us_boot = lat_ns2 / slices2 / NANOSECONDS_PER_USEC

lat_us_sum += lat_us
count += 1

debug_str = (
""
if not args.verbose
else " (%.6f ms avg wait time in %.6f s normalized by num tasks, i.e. %.6f / %d)"
% (
(lat_norm) / 1000000,
t2 - t1,
lat_all / 1000000,
int(num_tasks),
)
)
print(
"%-20s %6.2f %6.2f %6.2f | %7.2f %6.2f%s"
% (
datetime.fromtimestamp(t2).strftime("%Y-%m-%d %H:%M:%S"),
cpu,
lat,
100 - (cpu + lat),
lat_us,
lat_us_boot,
debug_str,
)
)
except KeyboardInterrupt:
print()

print(u"Mean RUNQ LAT: %.2f \u00B5s" % (lat_us_sum / count))


def parse_schedstat(text):
cpus = re.findall(
"cpu(?:\d+) (?:\d+ ){6}(?P<run>\d+) (?P<wait>\d+) (?P<slices>\d+)", text
)

cpu_ns, wait_ns, slices = 0, 0, 0
for cpu in cpus:
cpu_ns += int(cpu[0])
wait_ns += int(cpu[1])
slices += int(cpu[2])

cpu_ns /= len(cpus)
# Mean time all tasks spent waiting on the run queue across all CPUs
wait_ns /= len(cpus)
slices /= len(cpus)

return cpu_ns, wait_ns, slices


def get_args():
parser = argparse.ArgumentParser()
parser.add_argument(
"-i",
"--interval",
type=int,
default=1,
help="how long to sleep in seconds before measuring /proc/schedstat",
)
parser.add_argument(
"-c",
"--count",
type=int,
default=None,
help="number of intervals to track before stopping",
)
parser.add_argument(
"-v",
"--verbose",
action="store_true",
help="display info regarding the calculation of %%LAT",
)

args = parser.parse_args()
return args


if __name__ == "__main__":
main(get_args())
2 changes: 0 additions & 2 deletions bin/syscallargs
Original file line number Diff line number Diff line change
Expand Up @@ -130,5 +130,3 @@ def main():

if __name__ == "__main__":
main()