Skip to content

Commit eac370d

Browse files
Add useful functions for automation to submission checker utils
1 parent 9374cd0 commit eac370d

File tree

1 file changed

+210
-1
lines changed
  • tools/submission/submission_checker

1 file changed

+210
-1
lines changed

tools/submission/submission_checker/utils.py

Lines changed: 210 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11
import os
2+
from .constants import *
3+
from .parsers.loadgen_parser import LoadgenParser
24

35

46
def list_dir(*path):
@@ -101,4 +103,211 @@ def is_number(s):
101103
float(s)
102104
return True
103105
except ValueError:
104-
return False
106+
return False
107+
108+
109+
def get_performance_metric(
110+
config, model, path, scenario_fixed):
111+
# Assumes new logging format
112+
version = config.version
113+
114+
fname = os.path.join(path, "mlperf_log_detail.txt")
115+
mlperf_log = LoadgenParser(fname)
116+
if (
117+
"result_validity" in mlperf_log.get_keys()
118+
and mlperf_log["result_validity"] == "VALID"
119+
):
120+
is_valid = True
121+
scenario = mlperf_log["effective_scenario"]
122+
123+
res = float(mlperf_log[RESULT_FIELD_NEW[version][scenario]])
124+
if (
125+
version in RESULT_FIELD_BENCHMARK_OVERWRITE
126+
and model in RESULT_FIELD_BENCHMARK_OVERWRITE[version]
127+
and scenario in RESULT_FIELD_BENCHMARK_OVERWRITE[version][model]
128+
):
129+
res = float(
130+
mlperf_log[RESULT_FIELD_BENCHMARK_OVERWRITE[version]
131+
[model][scenario]]
132+
)
133+
134+
inferred = False
135+
if scenario_fixed != scenario:
136+
inferred, res, _ = get_inferred_result(
137+
scenario_fixed, scenario, res, mlperf_log, config, False
138+
)
139+
140+
return res
141+
142+
143+
def get_inferred_result(
144+
scenario_fixed, scenario, res, mlperf_log, config, log_error=False
145+
):
146+
147+
inferred = False
148+
is_valid = True
149+
# Check if current scenario (and version) uses early stopping
150+
uses_early_stopping = config.uses_early_stopping(scenario)
151+
152+
latency_mean = mlperf_log["result_mean_latency_ns"]
153+
if scenario in ["MultiStream"]:
154+
latency_99_percentile = mlperf_log[
155+
"result_99.00_percentile_per_query_latency_ns"
156+
]
157+
latency_mean = mlperf_log["result_mean_query_latency_ns"]
158+
samples_per_query = mlperf_log["effective_samples_per_query"]
159+
if scenario == "SingleStream":
160+
# qps_wo_loadgen_overhead is only used for inferring Offline from
161+
# SingleStream; only for old submissions
162+
qps_wo_loadgen_overhead = mlperf_log["result_qps_without_loadgen_overhead"]
163+
164+
# special case for results inferred from different scenario
165+
if scenario_fixed in ["Offline"] and scenario in ["SingleStream"]:
166+
inferred = True
167+
res = qps_wo_loadgen_overhead
168+
169+
if (scenario_fixed in ["Offline"]) and scenario in ["MultiStream"]:
170+
inferred = True
171+
res = samples_per_query * S_TO_MS / (latency_mean / MS_TO_NS)
172+
173+
if (scenario_fixed in ["MultiStream"]) and scenario in ["SingleStream"]:
174+
inferred = True
175+
# samples_per_query does not match with the one reported in the logs
176+
# when inferring MultiStream from SingleStream
177+
samples_per_query = 8
178+
if uses_early_stopping:
179+
early_stopping_latency_ms = mlperf_log["early_stopping_latency_ms"]
180+
if early_stopping_latency_ms == 0 and log_error:
181+
log.error(
182+
"Not enough samples were processed for early stopping to make an estimate"
183+
)
184+
is_valid = False
185+
res = (early_stopping_latency_ms * samples_per_query) / MS_TO_NS
186+
else:
187+
res = (latency_99_percentile * samples_per_query) / MS_TO_NS
188+
if (scenario_fixed in ["Interactive"]) and scenario not in ["Server"]:
189+
is_valid = False
190+
return inferred, res, is_valid
191+
192+
193+
def check_compliance_perf_dir(test_dir):
194+
is_valid = False
195+
import logging
196+
log = logging.getLogger("main")
197+
198+
fname = os.path.join(test_dir, "verify_performance.txt")
199+
if not os.path.exists(fname):
200+
log.error("%s is missing in %s", fname, test_dir)
201+
is_valid = False
202+
else:
203+
with open(fname, "r") as f:
204+
for line in f:
205+
# look for: TEST PASS
206+
if "TEST PASS" in line:
207+
is_valid = True
208+
break
209+
if is_valid == False:
210+
log.error(
211+
"Compliance test performance check in %s failed",
212+
test_dir)
213+
214+
# Check performance dir
215+
test_perf_path = os.path.join(test_dir, "performance", "run_1")
216+
if not os.path.exists(test_perf_path):
217+
log.error("%s has no performance/run_1 directory", test_dir)
218+
is_valid = False
219+
else:
220+
diff = files_diff(
221+
list_files(test_perf_path),
222+
REQUIRED_COMP_PER_FILES,
223+
["mlperf_log_accuracy.json"],
224+
)
225+
if diff:
226+
log.error(
227+
"%s has file list mismatch (%s)",
228+
test_perf_path,
229+
diff)
230+
is_valid = False
231+
232+
return is_valid
233+
234+
235+
def get_power_metric(config, scenario_fixed, log_path, is_valid, res):
236+
# parse the power logs
237+
import datetime
238+
import logging
239+
log = logging.getLogger("main")
240+
server_timezone = datetime.timedelta(0)
241+
client_timezone = datetime.timedelta(0)
242+
243+
detail_log_fname = os.path.join(log_path, "mlperf_log_detail.txt")
244+
mlperf_log = LoadgenParser(detail_log_fname)
245+
datetime_format = "%m-%d-%Y %H:%M:%S.%f"
246+
power_begin = (
247+
datetime.datetime.strptime(mlperf_log["power_begin"], datetime_format)
248+
+ client_timezone
249+
)
250+
power_end = (
251+
datetime.datetime.strptime(mlperf_log["power_end"], datetime_format)
252+
+ client_timezone
253+
)
254+
# Obtain the scenario also from logs to check if power is inferred
255+
scenario = mlperf_log["effective_scenario"]
256+
257+
spl_fname = os.path.join(log_path, "spl.txt")
258+
power_list = []
259+
with open(spl_fname) as f:
260+
for line in f:
261+
if not line.startswith("Time"):
262+
continue
263+
timestamp = (
264+
datetime.datetime.strptime(line.split(",")[1], datetime_format)
265+
+ server_timezone
266+
)
267+
if timestamp > power_begin and timestamp < power_end:
268+
value = float(line.split(",")[3])
269+
if value > 0:
270+
power_list.append(float(line.split(",")[3]))
271+
272+
if len(power_list) == 0:
273+
log.error(
274+
"%s has no power samples falling in power range: %s - %s",
275+
spl_fname,
276+
power_begin,
277+
power_end,
278+
)
279+
is_valid = False
280+
else:
281+
avg_power = sum(power_list) / len(power_list)
282+
power_duration = (power_end - power_begin).total_seconds()
283+
if scenario_fixed in ["Offline", "Server", "Interactive"]:
284+
# In Offline and Server scenarios, the power metric is in W.
285+
power_metric = avg_power
286+
avg_power_efficiency = res / avg_power
287+
288+
else:
289+
# In SingleStream and MultiStream scenarios, the power metric is in
290+
# mJ/query.
291+
assert scenario_fixed in [
292+
"MultiStream",
293+
"SingleStream",
294+
], "Unknown scenario: {:}".format(scenario_fixed)
295+
296+
num_queries = int(mlperf_log["result_query_count"])
297+
298+
power_metric = avg_power * power_duration * 1000 / num_queries
299+
300+
if scenario_fixed in ["SingleStream"]:
301+
samples_per_query = 1
302+
elif scenario_fixed in ["MultiStream"]:
303+
samples_per_query = 8
304+
305+
if (scenario_fixed in ["MultiStream"]
306+
) and scenario in ["SingleStream"]:
307+
power_metric = (
308+
avg_power * power_duration * samples_per_query * 1000 / num_queries
309+
)
310+
311+
avg_power_efficiency = (samples_per_query * 1000) / power_metric
312+
313+
return is_valid, power_metric, scenario, avg_power_efficiency

0 commit comments

Comments
 (0)