Skip to content

Commit 5e44991

Browse files
huydhnfacebook-github-bot
authored andcommitted
Upload iOS peak memory usage metric (#6282)
Summary: This extracts iOS `Memory Peak Physical, kB` metric and uses it as the benchmark `peak_mem_usage(mb)`. I convert the value to mb because it feels friendlier to human looking at the dashboard, but let me know if we need kb granularity. Also fix a small regex bug when parsing model name like `tinyllama_xnnpack+custom+qe_fp32` where the backend could include `+` This also fixes the missing key error in https://github.com/pytorch/executorch/actions/runs/11337625751/job/31531760841#step:7:112, it happened because the script failed to parse `tinyllama_xnnpack+custom+qe_fp32` and got back no benchmark records. Pull Request resolved: #6282 Reviewed By: guangy10 Differential Revision: D64453877 Pulled By: huydhn fbshipit-source-id: 7d0ad78027a527d9a42243790000787dfcc39e7b
1 parent c242c4c commit 5e44991

File tree

2 files changed

+49
-27
lines changed

2 files changed

+49
-27
lines changed

.github/scripts/extract_benchmark_results.py

Lines changed: 48 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@
99
import logging
1010
import os
1111
import re
12-
import time
1312
import zipfile
1413
from argparse import Action, ArgumentParser, Namespace
1514
from io import BytesIO
@@ -26,12 +25,15 @@
2625

2726
# iOS-related regexes and variables
2827
IOS_TEST_SPEC_REGEX = re.compile(
29-
r"Test Case\s+'-\[(?P<test_class>\w+)\s+(?P<test_name>\w+)\]'\s+measured\s+\[(?P<metric>.+)\]\s+average:\s+(?P<value>[\d\.]+),"
28+
r"Test Case\s+'-\[(?P<test_class>\w+)\s+(?P<test_name>[\w\+]+)\]'\s+measured\s+\[(?P<metric>.+)\]\s+average:\s+(?P<value>[\d\.]+),"
3029
)
3130
IOS_TEST_NAME_REGEX = re.compile(
32-
r"test_(?P<method>forward|load|generate)_(?P<model_name>\w+)_pte.*iOS_(?P<ios_ver>\w+)_iPhone(?P<iphone_ver>\w+)"
31+
r"test_(?P<method>forward|load|generate)_(?P<model_name>[\w\+]+)_pte.*iOS_(?P<ios_ver>\w+)_iPhone(?P<iphone_ver>\w+)"
32+
)
33+
# The backend name could contain +, i.e. tinyllama_xnnpack+custom+qe_fp32
34+
IOS_MODEL_NAME_REGEX = re.compile(
35+
r"(?P<model>[^_]+)_(?P<backend>[\w\+]+)_(?P<dtype>\w+)"
3336
)
34-
IOS_MODEL_NAME_REGEX = re.compile(r"(?P<model>[^_]+)_(?P<backend>\w+)_(?P<dtype>\w+)")
3537

3638

3739
class ValidateArtifacts(Action):
@@ -159,19 +161,8 @@ def initialize_ios_metadata(test_name: str) -> Dict[str, any]:
159161
ios_ver = m.group("ios_ver").replace("_", ".")
160162
iphone_ver = m.group("iphone_ver").replace("_", ".")
161163

162-
# NB: This looks brittle, but unless we can return iOS benchmark results in JSON
163-
# format by the test, the mapping is needed to match with Android test
164-
if method == "load":
165-
metric = "model_load_time(ms)"
166-
elif method == "forward":
167-
metric = (
168-
"generate_time(ms)"
169-
if "llama" in model_name
170-
else "avg_inference_latency(ms)"
171-
)
172-
elif method == "generate":
173-
metric = "token_per_sec"
174-
164+
# The default backend and quantization dtype if the script couldn't extract
165+
# them from the model name
175166
backend = ""
176167
quantization = "unknown"
177168

@@ -194,8 +185,9 @@ def initialize_ios_metadata(test_name: str) -> Dict[str, any]:
194185
"availMem": 0,
195186
"totalMem": 0,
196187
},
197-
"metric": metric,
188+
"method": method,
198189
# These fields will be populated later by extract_ios_metric
190+
"metric": "",
199191
"actualValue": 0,
200192
"targetValue": 0,
201193
}
@@ -210,10 +202,38 @@ def extract_ios_metric(
210202
"""
211203
Map the metric name from iOS xcresult to the benchmark result
212204
"""
213-
if metric_name == "Clock Monotonic Time, s":
214-
# The benchmark value is in ms
215-
benchmark_result["actualValue"] = metric_value * 1000
216-
elif metric_name == "Tokens Per Second, t/s":
205+
method = benchmark_result.get("method", "")
206+
if not method:
207+
return benchmark_result
208+
209+
# NB: This looks brittle, but unless we can return iOS benchmark results in JSON
210+
# format by the test, the mapping is needed to match with Android test
211+
if method == "load":
212+
if metric_name == "Clock Monotonic Time, s":
213+
benchmark_result["metric"] = "model_load_time(ms)"
214+
benchmark_result["actualValue"] = metric_value * 1000
215+
216+
elif metric_name == "Memory Peak Physical, kB":
217+
# NB: Showing the value in mB is friendlier IMO
218+
benchmark_result["metric"] = "peak_load_mem_usage(mb)"
219+
benchmark_result["actualValue"] = metric_value / 1024
220+
221+
elif method == "forward":
222+
if metric_name == "Clock Monotonic Time, s":
223+
benchmark_result["metric"] = (
224+
"generate_time(ms)"
225+
if "llama" in test_name
226+
else "avg_inference_latency(ms)"
227+
)
228+
benchmark_result["actualValue"] = metric_value * 1000
229+
230+
elif metric_name == "Memory Peak Physical, kB":
231+
# NB: Showing the value in mB is friendlier IMO
232+
benchmark_result["metric"] = "peak_inference_mem_usage(mb)"
233+
benchmark_result["actualValue"] = metric_value / 1024
234+
235+
elif method == "generate" and metric_name == "Tokens Per Second, t/s":
236+
benchmark_result["metric"] = "token_per_sec"
217237
benchmark_result["actualValue"] = metric_value
218238

219239
return benchmark_result
@@ -235,31 +255,33 @@ def extract_ios_benchmark_results(
235255

236256
with request.urlopen(artifact_s3_url) as data:
237257
current_test_name = ""
258+
current_metric_name = ""
238259
current_record = {}
239260

240261
for line in data.read().decode("utf8").splitlines():
241262
s = IOS_TEST_SPEC_REGEX.search(line)
242263
if not s:
243264
continue
244265

245-
test_class = s.group("test_class")
246266
test_name = s.group("test_name")
247267
metric_name = s.group("metric")
248268
metric_value = float(s.group("value"))
249269

250-
if test_name != current_test_name:
251-
if current_record:
270+
if test_name != current_test_name or metric_name != current_metric_name:
271+
if current_record and current_record.get("metric", ""):
252272
# Save the benchmark result in the same format used by Android
253273
benchmark_results.append(current_record.copy())
254274

255275
current_test_name = test_name
276+
current_metric_name = metric_name
256277
current_record = initialize_ios_metadata(current_test_name)
257278

258279
current_record = extract_ios_metric(
259280
current_record, test_name, metric_name, metric_value
260281
)
261282

262-
benchmark_results.append(current_record.copy())
283+
if current_record and current_record.get("metric", ""):
284+
benchmark_results.append(current_record.copy())
263285

264286
return benchmark_results
265287

.github/workflows/apple-perf.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ jobs:
7676
# on-demand and periodic benchmarking.
7777
CRON_DEFAULT_MODELS: "stories110M,mv3,mv2,ic4,ic3,resnet50,edsr,mobilebert,w2l"
7878
CRON_DEFAULT_DEVICES: "apple_iphone_15"
79-
CRON_DEFAULT_DELEGATES: "xnnpack,coreml,mps"
79+
CRON_DEFAULT_DELEGATES: "nnpack,coreml,mps"
8080
run: |
8181
set -ex
8282
MODELS="${{ inputs.models }}"

0 commit comments

Comments
 (0)