11import os
2+ from .constants import *
3+ from .parsers .loadgen_parser import LoadgenParser
24
35
46def list_dir (* path ):
@@ -101,4 +103,211 @@ def is_number(s):
101103 float (s )
102104 return True
103105 except ValueError :
104- return False
106+ return False
107+
108+
109+ def get_performance_metric (
110+ config , model , path , scenario_fixed ):
111+ # Assumes new logging format
112+ version = config .version
113+
114+ fname = os .path .join (path , "mlperf_log_detail.txt" )
115+ mlperf_log = LoadgenParser (fname )
116+ if (
117+ "result_validity" in mlperf_log .get_keys ()
118+ and mlperf_log ["result_validity" ] == "VALID"
119+ ):
120+ is_valid = True
121+ scenario = mlperf_log ["effective_scenario" ]
122+
123+ res = float (mlperf_log [RESULT_FIELD_NEW [version ][scenario ]])
124+ if (
125+ version in RESULT_FIELD_BENCHMARK_OVERWRITE
126+ and model in RESULT_FIELD_BENCHMARK_OVERWRITE [version ]
127+ and scenario in RESULT_FIELD_BENCHMARK_OVERWRITE [version ][model ]
128+ ):
129+ res = float (
130+ mlperf_log [RESULT_FIELD_BENCHMARK_OVERWRITE [version ]
131+ [model ][scenario ]]
132+ )
133+
134+ inferred = False
135+ if scenario_fixed != scenario :
136+ inferred , res , _ = get_inferred_result (
137+ scenario_fixed , scenario , res , mlperf_log , config , False
138+ )
139+
140+ return res
141+
142+
143+ def get_inferred_result (
144+ scenario_fixed , scenario , res , mlperf_log , config , log_error = False
145+ ):
146+
147+ inferred = False
148+ is_valid = True
149+ # Check if current scenario (and version) uses early stopping
150+ uses_early_stopping = config .uses_early_stopping (scenario )
151+
152+ latency_mean = mlperf_log ["result_mean_latency_ns" ]
153+ if scenario in ["MultiStream" ]:
154+ latency_99_percentile = mlperf_log [
155+ "result_99.00_percentile_per_query_latency_ns"
156+ ]
157+ latency_mean = mlperf_log ["result_mean_query_latency_ns" ]
158+ samples_per_query = mlperf_log ["effective_samples_per_query" ]
159+ if scenario == "SingleStream" :
160+ # qps_wo_loadgen_overhead is only used for inferring Offline from
161+ # SingleStream; only for old submissions
162+ qps_wo_loadgen_overhead = mlperf_log ["result_qps_without_loadgen_overhead" ]
163+
164+ # special case for results inferred from different scenario
165+ if scenario_fixed in ["Offline" ] and scenario in ["SingleStream" ]:
166+ inferred = True
167+ res = qps_wo_loadgen_overhead
168+
169+ if (scenario_fixed in ["Offline" ]) and scenario in ["MultiStream" ]:
170+ inferred = True
171+ res = samples_per_query * S_TO_MS / (latency_mean / MS_TO_NS )
172+
173+ if (scenario_fixed in ["MultiStream" ]) and scenario in ["SingleStream" ]:
174+ inferred = True
175+ # samples_per_query does not match with the one reported in the logs
176+ # when inferring MultiStream from SingleStream
177+ samples_per_query = 8
178+ if uses_early_stopping :
179+ early_stopping_latency_ms = mlperf_log ["early_stopping_latency_ms" ]
180+ if early_stopping_latency_ms == 0 and log_error :
181+ log .error (
182+ "Not enough samples were processed for early stopping to make an estimate"
183+ )
184+ is_valid = False
185+ res = (early_stopping_latency_ms * samples_per_query ) / MS_TO_NS
186+ else :
187+ res = (latency_99_percentile * samples_per_query ) / MS_TO_NS
188+ if (scenario_fixed in ["Interactive" ]) and scenario not in ["Server" ]:
189+ is_valid = False
190+ return inferred , res , is_valid
191+
192+
193+ def check_compliance_perf_dir (test_dir ):
194+ is_valid = False
195+ import logging
196+ log = logging .getLogger ("main" )
197+
198+ fname = os .path .join (test_dir , "verify_performance.txt" )
199+ if not os .path .exists (fname ):
200+ log .error ("%s is missing in %s" , fname , test_dir )
201+ is_valid = False
202+ else :
203+ with open (fname , "r" ) as f :
204+ for line in f :
205+ # look for: TEST PASS
206+ if "TEST PASS" in line :
207+ is_valid = True
208+ break
209+ if is_valid == False :
210+ log .error (
211+ "Compliance test performance check in %s failed" ,
212+ test_dir )
213+
214+ # Check performance dir
215+ test_perf_path = os .path .join (test_dir , "performance" , "run_1" )
216+ if not os .path .exists (test_perf_path ):
217+ log .error ("%s has no performance/run_1 directory" , test_dir )
218+ is_valid = False
219+ else :
220+ diff = files_diff (
221+ list_files (test_perf_path ),
222+ REQUIRED_COMP_PER_FILES ,
223+ ["mlperf_log_accuracy.json" ],
224+ )
225+ if diff :
226+ log .error (
227+ "%s has file list mismatch (%s)" ,
228+ test_perf_path ,
229+ diff )
230+ is_valid = False
231+
232+ return is_valid
233+
234+
235+ def get_power_metric (config , scenario_fixed , log_path , is_valid , res ):
236+ # parse the power logs
237+ import datetime
238+ import logging
239+ log = logging .getLogger ("main" )
240+ server_timezone = datetime .timedelta (0 )
241+ client_timezone = datetime .timedelta (0 )
242+
243+ detail_log_fname = os .path .join (log_path , "mlperf_log_detail.txt" )
244+ mlperf_log = LoadgenParser (detail_log_fname )
245+ datetime_format = "%m-%d-%Y %H:%M:%S.%f"
246+ power_begin = (
247+ datetime .datetime .strptime (mlperf_log ["power_begin" ], datetime_format )
248+ + client_timezone
249+ )
250+ power_end = (
251+ datetime .datetime .strptime (mlperf_log ["power_end" ], datetime_format )
252+ + client_timezone
253+ )
254+ # Obtain the scenario also from logs to check if power is inferred
255+ scenario = mlperf_log ["effective_scenario" ]
256+
257+ spl_fname = os .path .join (log_path , "spl.txt" )
258+ power_list = []
259+ with open (spl_fname ) as f :
260+ for line in f :
261+ if not line .startswith ("Time" ):
262+ continue
263+ timestamp = (
264+ datetime .datetime .strptime (line .split ("," )[1 ], datetime_format )
265+ + server_timezone
266+ )
267+ if timestamp > power_begin and timestamp < power_end :
268+ value = float (line .split ("," )[3 ])
269+ if value > 0 :
270+ power_list .append (float (line .split ("," )[3 ]))
271+
272+ if len (power_list ) == 0 :
273+ log .error (
274+ "%s has no power samples falling in power range: %s - %s" ,
275+ spl_fname ,
276+ power_begin ,
277+ power_end ,
278+ )
279+ is_valid = False
280+ else :
281+ avg_power = sum (power_list ) / len (power_list )
282+ power_duration = (power_end - power_begin ).total_seconds ()
283+ if scenario_fixed in ["Offline" , "Server" , "Interactive" ]:
284+ # In Offline and Server scenarios, the power metric is in W.
285+ power_metric = avg_power
286+ avg_power_efficiency = res / avg_power
287+
288+ else :
289+ # In SingleStream and MultiStream scenarios, the power metric is in
290+ # mJ/query.
291+ assert scenario_fixed in [
292+ "MultiStream" ,
293+ "SingleStream" ,
294+ ], "Unknown scenario: {:}" .format (scenario_fixed )
295+
296+ num_queries = int (mlperf_log ["result_query_count" ])
297+
298+ power_metric = avg_power * power_duration * 1000 / num_queries
299+
300+ if scenario_fixed in ["SingleStream" ]:
301+ samples_per_query = 1
302+ elif scenario_fixed in ["MultiStream" ]:
303+ samples_per_query = 8
304+
305+ if (scenario_fixed in ["MultiStream" ]
306+ ) and scenario in ["SingleStream" ]:
307+ power_metric = (
308+ avg_power * power_duration * samples_per_query * 1000 / num_queries
309+ )
310+
311+ avg_power_efficiency = (samples_per_query * 1000 ) / power_metric
312+
313+ return is_valid , power_metric , scenario , avg_power_efficiency
0 commit comments