99import  logging 
1010import  os 
1111import  re 
12- import  time 
1312import  zipfile 
1413from  argparse  import  Action , ArgumentParser , Namespace 
1514from  io  import  BytesIO 
2625
2726# iOS-related regexes and variables 
2827IOS_TEST_SPEC_REGEX  =  re .compile (
29-     r"Test Case\s+'-\[(?P<test_class>\w+)\s+(?P<test_name>\w +)\]'\s+measured\s+\[(?P<metric>.+)\]\s+average:\s+(?P<value>[\d\.]+)," 
28+     r"Test Case\s+'-\[(?P<test_class>\w+)\s+(?P<test_name>[\w\+] +)\]'\s+measured\s+\[(?P<metric>.+)\]\s+average:\s+(?P<value>[\d\.]+)," 
3029)
3130IOS_TEST_NAME_REGEX  =  re .compile (
32-     r"test_(?P<method>forward|load|generate)_(?P<model_name>\w+)_pte.*iOS_(?P<ios_ver>\w+)_iPhone(?P<iphone_ver>\w+)" 
31+     r"test_(?P<method>forward|load|generate)_(?P<model_name>[\w\+]+)_pte.*iOS_(?P<ios_ver>\w+)_iPhone(?P<iphone_ver>\w+)" 
32+ )
33+ # The backend name could contain +, i.e. tinyllama_xnnpack+custom+qe_fp32 
34+ IOS_MODEL_NAME_REGEX  =  re .compile (
35+     r"(?P<model>[^_]+)_(?P<backend>[\w\+]+)_(?P<dtype>\w+)" 
3336)
34- IOS_MODEL_NAME_REGEX  =  re .compile (r"(?P<model>[^_]+)_(?P<backend>\w+)_(?P<dtype>\w+)" )
3537
3638
3739class  ValidateArtifacts (Action ):
@@ -159,19 +161,8 @@ def initialize_ios_metadata(test_name: str) -> Dict[str, any]:
159161    ios_ver  =  m .group ("ios_ver" ).replace ("_" , "." )
160162    iphone_ver  =  m .group ("iphone_ver" ).replace ("_" , "." )
161163
162-     # NB: This looks brittle, but unless we can return iOS benchmark results in JSON 
163-     # format by the test, the mapping is needed to match with Android test 
164-     if  method  ==  "load" :
165-         metric  =  "model_load_time(ms)" 
166-     elif  method  ==  "forward" :
167-         metric  =  (
168-             "generate_time(ms)" 
169-             if  "llama"  in  model_name 
170-             else  "avg_inference_latency(ms)" 
171-         )
172-     elif  method  ==  "generate" :
173-         metric  =  "token_per_sec" 
174- 
164+     # The default backend and quantization dtype if the script couldn't extract 
165+     # them from the model name 
175166    backend  =  "" 
176167    quantization  =  "unknown" 
177168
@@ -194,8 +185,9 @@ def initialize_ios_metadata(test_name: str) -> Dict[str, any]:
194185            "availMem" : 0 ,
195186            "totalMem" : 0 ,
196187        },
197-         "metric " : metric ,
188+         "method " : method ,
198189        # These fields will be populated later by extract_ios_metric 
190+         "metric" : "" ,
199191        "actualValue" : 0 ,
200192        "targetValue" : 0 ,
201193    }
@@ -210,10 +202,38 @@ def extract_ios_metric(
210202    """ 
211203    Map the metric name from iOS xcresult to the benchmark result 
212204    """ 
213-     if  metric_name  ==  "Clock Monotonic Time, s" :
214-         # The benchmark value is in ms 
215-         benchmark_result ["actualValue" ] =  metric_value  *  1000 
216-     elif  metric_name  ==  "Tokens Per Second, t/s" :
205+     method  =  benchmark_result .get ("method" , "" )
206+     if  not  method :
207+         return  benchmark_result 
208+ 
209+     # NB: This looks brittle, but unless we can return iOS benchmark results in JSON 
210+     # format by the test, the mapping is needed to match with Android test 
211+     if  method  ==  "load" :
212+         if  metric_name  ==  "Clock Monotonic Time, s" :
213+             benchmark_result ["metric" ] =  "model_load_time(ms)" 
214+             benchmark_result ["actualValue" ] =  metric_value  *  1000 
215+ 
216+         elif  metric_name  ==  "Memory Peak Physical, kB" :
217+             # NB: Showing the value in mB is friendlier IMO 
218+             benchmark_result ["metric" ] =  "peak_load_mem_usage(mb)" 
219+             benchmark_result ["actualValue" ] =  metric_value  /  1024 
220+ 
221+     elif  method  ==  "forward" :
222+         if  metric_name  ==  "Clock Monotonic Time, s" :
223+             benchmark_result ["metric" ] =  (
224+                 "generate_time(ms)" 
225+                 if  "llama"  in  test_name 
226+                 else  "avg_inference_latency(ms)" 
227+             )
228+             benchmark_result ["actualValue" ] =  metric_value  *  1000 
229+ 
230+         elif  metric_name  ==  "Memory Peak Physical, kB" :
231+             # NB: Showing the value in mB is friendlier IMO 
232+             benchmark_result ["metric" ] =  "peak_inference_mem_usage(mb)" 
233+             benchmark_result ["actualValue" ] =  metric_value  /  1024 
234+ 
235+     elif  method  ==  "generate"  and  metric_name  ==  "Tokens Per Second, t/s" :
236+         benchmark_result ["metric" ] =  "token_per_sec" 
217237        benchmark_result ["actualValue" ] =  metric_value 
218238
219239    return  benchmark_result 
@@ -235,31 +255,33 @@ def extract_ios_benchmark_results(
235255
236256        with  request .urlopen (artifact_s3_url ) as  data :
237257            current_test_name  =  "" 
258+             current_metric_name  =  "" 
238259            current_record  =  {}
239260
240261            for  line  in  data .read ().decode ("utf8" ).splitlines ():
241262                s  =  IOS_TEST_SPEC_REGEX .search (line )
242263                if  not  s :
243264                    continue 
244265
245-                 test_class  =  s .group ("test_class" )
246266                test_name  =  s .group ("test_name" )
247267                metric_name  =  s .group ("metric" )
248268                metric_value  =  float (s .group ("value" ))
249269
250-                 if  test_name  !=  current_test_name :
251-                     if  current_record :
270+                 if  test_name  !=  current_test_name   or   metric_name   !=   current_metric_name :
271+                     if  current_record   and   current_record . get ( "metric" ,  "" ) :
252272                        # Save the benchmark result in the same format used by Android 
253273                        benchmark_results .append (current_record .copy ())
254274
255275                    current_test_name  =  test_name 
276+                     current_metric_name  =  metric_name 
256277                    current_record  =  initialize_ios_metadata (current_test_name )
257278
258279                current_record  =  extract_ios_metric (
259280                    current_record , test_name , metric_name , metric_value 
260281                )
261282
262-             benchmark_results .append (current_record .copy ())
283+             if  current_record  and  current_record .get ("metric" , "" ):
284+                 benchmark_results .append (current_record .copy ())
263285
264286        return  benchmark_results 
265287
0 commit comments