Skip to content

Commit 84d0176

Browse files
authored
Merge pull request #320 from perezjosibm/wip.report_gen
First version of the report generator. Add further tools.
2 parents fd47fea + bfe3dce commit 84d0176

File tree

7 files changed

+2030
-69
lines changed

7 files changed

+2030
-69
lines changed

tools/diskstat_diff.py

Lines changed: 213 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,213 @@
1+
#!/usr/bin/python
2+
"""
3+
This script expect an input .json file name as argument, and a .json stream
4+
from stdin, and
5+
calculates its difference, (producing a gnuplot .plot and dat for it)
6+
Might generalise later for a whole set of samples (like we do with top).
7+
It could also be extended to process .json from ceph conf osd tell dump_metrics.
8+
"""
9+
10+
import argparse
11+
import logging
12+
import os
13+
import sys
14+
import re
15+
import json
16+
import tempfile
17+
18+
__author__ = "Jose J Palacios-Perez"
19+
20+
logger = logging.getLogger(__name__)
21+
22+
23+
def serialize_sets(obj):
24+
"""
25+
Serialise sets as lists
26+
"""
27+
if isinstance(obj, set):
28+
return list(obj)
29+
30+
return obj
31+
32+
33+
class DiskStatEntry(object):
34+
"""
35+
Calculate the difference between an diskstat .json file and
36+
a .json stream from stdin, and
37+
produce a gnuplot and .JSON of the difference
38+
jc --pretty /proc/diskstats
39+
{
40+
"maj": 8,
41+
"min": 1,
42+
"device": "sda1",
43+
"reads_completed": 43291,
44+
"reads_merged": 34899,
45+
"sectors_read": 4570338,
46+
"read_time_ms": 20007,
47+
"writes_completed": 6562480,
48+
"writes_merged": 9555760,
49+
"sectors_written": 1681486816,
50+
"write_time_ms": 10427489,
51+
"io_in_progress": 0,
52+
"io_time_ms": 2062151,
53+
"weighted_io_time_ms": 10447497,
54+
"discards_completed_successfully": 0,
55+
"discards_merged": 0,
56+
"sectors_discarded": 0,
57+
"discarding_time_ms": 0,
58+
"flush_requests_completed_successfully": 0,
59+
"flushing_time_ms": 0
60+
}
61+
62+
Only interested in the following measurements:
63+
"device" "reads_completed" "read_time_ms" "writes_completed" "write_time_ms"
64+
"""
65+
66+
def __init__(self, aname: str, regex: str, directory: str):
67+
"""
68+
This class expects two input .json files
69+
Calculates the difference b - a and replaces b with this
70+
The result is a dict with keys the device names, values the measurements above
71+
"""
72+
self.aname = aname
73+
self.regex = re.compile(regex) # , re.DEBUG)
74+
self.time_re = re.compile(r"_time_ms$")
75+
self.measurements = [
76+
"reads_completed",
77+
"read_time_ms",
78+
"writes_completed",
79+
"write_time_ms",
80+
]
81+
82+
self.directory = directory
83+
self._diff = {}
84+
85+
def filter_metrics(self, ds):
86+
"""
87+
Filter the (array of dicts) to the measurements we want, of those device names
88+
"""
89+
result = {}
90+
for item in ds:
91+
dv = item["device"]
92+
# Can we use list comprehension here?
93+
if self.regex.search(dv):
94+
if dv not in result:
95+
result.update({dv: {}})
96+
for m in self.measurements:
97+
result[dv].update({m: item[m]})
98+
return result
99+
100+
def get_diff(self, a_data, b_data):
101+
"""
102+
Calculate the difference of b_data - a_data
103+
Assigns the result to self._diff
104+
"""
105+
for dev in b_data:
106+
for m in b_data[dev]:
107+
if self.time_re.search(m):
108+
_max = max([b_data[dev][m], a_data[dev][m]])
109+
b_data[dev][m] = _max
110+
else:
111+
b_data[dev][m] -= a_data[dev][m]
112+
self._diff = b_data
113+
114+
def load_json(self, json_fname):
115+
"""
116+
Load a .json file containing diskstat metrics
117+
Returns a dict with keys only those interested device names
118+
"""
119+
try:
120+
with open(json_fname, "r") as json_data:
121+
ds_list = []
122+
# check for empty file
123+
f_info = os.fstat(json_data.fileno())
124+
if f_info.st_size == 0:
125+
logger.error(f"JSON input file {json_fname} is empty")
126+
return ds_list
127+
ds_list = json.load(json_data)
128+
return self.filter_metrics(ds_list)
129+
except IOError as e:
130+
raise argparse.ArgumentTypeError(str(e))
131+
132+
def save_json(self):
133+
"""
134+
Save the difference
135+
"""
136+
if self.aname:
137+
with open(self.aname, "w", encoding="utf-8") as f:
138+
json.dump(
139+
self._diff, f, indent=4, sort_keys=True, default=serialize_sets
140+
)
141+
f.close()
142+
143+
def run(self):
144+
"""
145+
Entry point: processes the input files, then produces the diff
146+
and saves it back to -a
147+
"""
148+
os.chdir(self.directory)
149+
a_data = self.load_json(self.aname)
150+
b_data = self.filter_metrics(json.load(sys.stdin))
151+
self.get_diff(a_data, b_data)
152+
self.save_json()
153+
154+
155+
def main(argv):
156+
examples = """
157+
Examples:
158+
# Calculate the difference in diskstats between the start/end of a performance run:
159+
# jc --pretty /proc/diskstats > _start.json
160+
< .. run test.. >
161+
# jc --pretty /proc/diskstats | %prog -a _start.json
162+
163+
"""
164+
parser = argparse.ArgumentParser(
165+
description="""This tool is used to calculate the difference in diskstat measurements""",
166+
epilog=examples,
167+
formatter_class=argparse.RawDescriptionHelpFormatter,
168+
)
169+
parser.add_argument(
170+
"-a",
171+
type=str,
172+
required=True,
173+
help="Input .json file",
174+
default=None,
175+
)
176+
parser.add_argument(
177+
"-r",
178+
"--regex",
179+
type=str,
180+
required=False,
181+
help="Regex to describe the device names",
182+
default=r"nvme\d+n1p2",
183+
)
184+
185+
parser.add_argument(
186+
"-d", "--directory", type=str, help="Directory to examine", default="./"
187+
)
188+
parser.add_argument(
189+
"-v",
190+
"--verbose",
191+
action="store_true",
192+
help="True to enable verbose logging mode",
193+
default=False,
194+
)
195+
196+
options = parser.parse_args(argv)
197+
198+
if options.verbose:
199+
logLevel = logging.DEBUG
200+
else:
201+
logLevel = logging.INFO
202+
203+
with tempfile.NamedTemporaryFile(dir="/tmp", delete=False) as tmpfile:
204+
logging.basicConfig(filename=tmpfile.name, encoding="utf-8", level=logLevel)
205+
206+
logger.debug(f"Got options: {options}")
207+
208+
dsDiff = DiskStatEntry(options.a, options.regex, options.directory)
209+
dsDiff.run()
210+
211+
212+
if __name__ == "__main__":
213+
main(sys.argv[1:])

0 commit comments

Comments
 (0)