-
Notifications
You must be signed in to change notification settings - Fork 694
[Benchmark] Generate benchmark record for job failure #9247
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 12 commits
c2bd579
75338a8
b3afd5a
c823e0d
13baaec
4972ab4
8730b5a
2052792
935297a
003aa73
4f9804b
c76c198
62aaf6a
5bfb411
5f0000e
6f2efd8
b6ee08f
a7d3be2
69c42fa
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -14,7 +14,7 @@ | |
from argparse import Action, ArgumentParser, Namespace | ||
from io import BytesIO | ||
from logging import info, warning | ||
from typing import Any, Dict, List, Optional | ||
from typing import Any, DefaultDict, Dict, List, Optional | ||
from urllib import error, request | ||
|
||
|
||
|
@@ -94,12 +94,18 @@ def parse_args() -> Any: | |
help="the directory to keep the benchmark configs", | ||
) | ||
|
||
parser.add_argument( | ||
"--app", | ||
type=str, | ||
required=True, | ||
choices=["android", "ios"], | ||
help="the type of app, ios or android, this is mainly used to generate default record when a failed job happens", | ||
) | ||
|
||
return parser.parse_args() | ||
|
||
|
||
def extract_android_benchmark_results( | ||
job_name: str, artifact_type: str, artifact_s3_url: str | ||
) -> List: | ||
def extract_android_benchmark_results(artifact_type: str, artifact_s3_url: str) -> List: | ||
""" | ||
The benchmark results from Android have already been stored in CUSTOMER_ARTIFACT | ||
artifact, so we will just need to get it | ||
|
@@ -220,9 +226,7 @@ def extract_ios_metric( | |
return benchmark_result | ||
|
||
|
||
def extract_ios_benchmark_results( | ||
job_name: str, artifact_type: str, artifact_s3_url: str | ||
) -> List: | ||
def extract_ios_benchmark_results(artifact_type: str, artifact_s3_url: str) -> List: | ||
""" | ||
The benchmark results from iOS are currently from xcresult, which could either | ||
be parsed from CUSTOMER_ARTIFACT or get from the test spec output. The latter | ||
|
@@ -385,54 +389,293 @@ def transform( | |
] | ||
|
||
|
||
def main() -> None: | ||
args = parse_args() | ||
def extract_model_info(git_job_name: str) -> Optional[Dict[str, str]]: | ||
""" | ||
Get model infomation form git_job_name, for example: | ||
benchmark-on-device (ic4, qnn_q8, samsung_galaxy_s24, arn:aws:devicefarm:us-west-2:308535385114:d... / mobile-job (android) | ||
benchmark-on-device (llama, xnnpack_q8, apple_iphone_15, arn:aws:devicefarm:us-west-2:30853538511... / mobile-job (ios) | ||
""" | ||
# Extract content inside the first parentheses, | ||
|
||
pattern = r"benchmark-on-device \((.+)" | ||
match = re.search(pattern, git_job_name) | ||
if not match: | ||
warning( | ||
f"pattern not found from git_job_name {git_job_name}, cannot extract correct names" | ||
) | ||
return None | ||
|
||
extracted_content = match.group(1) # Get content after the opening parenthesis | ||
items = extracted_content.split(",") | ||
if len(items) < 3: | ||
warning( | ||
f"expect at least 3 items extrac from git_job_name {git_job_name}, but got {items}" | ||
) | ||
return None | ||
|
||
# Across all devices, keeping both schemas for now until ExecuTorch dashboard migrates to v3 | ||
all_benchmark_results = [] | ||
benchmark_config = {} | ||
return { | ||
"model_name": items[0].strip(), | ||
"model_backend": items[1].strip(), | ||
"device_pool_name": items[2].strip(), | ||
} | ||
|
||
with open(args.artifacts) as f: | ||
for artifact in json.load(f): | ||
app_type = artifact.get("app_type", "") | ||
# We expect this to be set to either ANDROID_APP or IOS_APP | ||
if not app_type or app_type not in ["ANDROID_APP", "IOS_APP"]: | ||
info( | ||
f"App type {app_type} is not recognized in artifact {json.dumps(artifact)}" | ||
) | ||
continue | ||
|
||
job_name = artifact["job_name"] | ||
artifact_type = artifact["type"] | ||
artifact_s3_url = artifact["s3_url"] | ||
def transform_failure_record( | ||
app_type: str, | ||
level: str, | ||
model_name: str, | ||
model_backend: str, | ||
device_name: str, | ||
device_os: str, | ||
result: str, | ||
report: Any = {}, | ||
) -> Any: | ||
""" | ||
Transform the benchmark results into the format writable into the benchmark database for job failures | ||
""" | ||
# From https://github.com/pytorch/pytorch/wiki/How-to-integrate-with-PyTorch-OSS-benchmark-database | ||
return { | ||
"benchmark": { | ||
"name": "ExecuTorch", | ||
"mode": "inference", | ||
"extra_info": { | ||
"app_type": app_type, | ||
"job_conclusion": result, | ||
yangw-dev marked this conversation as resolved.
Show resolved
Hide resolved
|
||
"failure_level": level, | ||
yangw-dev marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
"job_report": json.dumps(report), | ||
}, | ||
}, | ||
"model": { | ||
"name": model_name, | ||
"type": "OSS model", | ||
"backend": model_backend, | ||
}, | ||
"metric": { | ||
"name": "FAILURE_REPORT", | ||
"benchmark_values": 0, | ||
"target_value": 0, | ||
"extra_info": { | ||
yangw-dev marked this conversation as resolved.
Show resolved
Hide resolved
|
||
"method": "", | ||
}, | ||
}, | ||
"runners": [ | ||
{ | ||
"name": device_name, | ||
"type": device_os, | ||
"avail_mem_in_gb": "", | ||
"total_mem_in_gb": "", | ||
} | ||
], | ||
} | ||
|
||
if artifact_type == "TESTSPEC_OUTPUT": | ||
benchmark_config = read_benchmark_config( | ||
artifact_s3_url, args.benchmark_configs | ||
) | ||
benchmark_results = [] | ||
if app_type == "ANDROID_APP": | ||
benchmark_results = extract_android_benchmark_results( | ||
job_name, artifact_type, artifact_s3_url | ||
) | ||
|
||
if app_type == "IOS_APP": | ||
benchmark_results = extract_ios_benchmark_results( | ||
job_name, artifact_type, artifact_s3_url | ||
) | ||
def to_job_report_map(job_reports) -> Dict[str, Any]: | ||
return {job_report["arn"]: job_report for job_report in job_reports} | ||
|
||
|
||
def group_by_arn(artifacts: List) -> Dict[str, List]: | ||
""" | ||
Group the artifacts by the job ARN | ||
""" | ||
arn_to_artifacts = DefaultDict(list) | ||
for artifact in artifacts: | ||
job_arn = artifact.get("job_arn", "") | ||
app_type = artifact.get("app_type", "") | ||
if not app_type or app_type not in ["ANDROID_APP", "IOS_APP"]: | ||
info( | ||
f"App type {app_type} is not recognized in artifact {json.dumps(artifact)}" | ||
) | ||
continue | ||
if not job_arn: | ||
info(f"missing job_arn in artifact {json.dumps(artifact)}") | ||
continue | ||
arn_to_artifacts[job_arn].append(artifact) | ||
return arn_to_artifacts | ||
|
||
|
||
# get the benchmark config from TestSpec file if any exist | ||
def get_benchmark_config( | ||
artifacts: List[Dict[str, Any]], benchmark_configs: str | ||
) -> Dict[str, str]: | ||
result = next( | ||
(artifact for artifact in artifacts if artifact["type"] == "TESTSPEC_OUTPUT"), | ||
None, | ||
) | ||
if not result: | ||
return {} | ||
artifact_s3_url = result["s3_url"] | ||
return read_benchmark_config(artifact_s3_url, benchmark_configs) | ||
|
||
|
||
def extractBenchmarkResultFromArtifact( | ||
artifact: Dict[str, Any], | ||
benchmark_config: Dict[str, str], | ||
) -> List[Any]: | ||
job_name = artifact.get("job_name", "") | ||
artifact_type = artifact.get("type", "") | ||
artifact_s3_url = artifact.get("s3_url", "") | ||
app_type = artifact.get("app_type", "") | ||
|
||
info( | ||
f"Processing {app_type} artifact: {job_name} {artifact_type} {artifact_s3_url}" | ||
) | ||
benchmark_results = [] | ||
if app_type == "ANDROID_APP": | ||
benchmark_results = extract_android_benchmark_results( | ||
artifact_type, artifact_s3_url | ||
) | ||
if app_type == "IOS_APP": | ||
benchmark_results = extract_ios_benchmark_results( | ||
artifact_type, artifact_s3_url | ||
) | ||
if not benchmark_results: | ||
return [] | ||
return transform(app_type, benchmark_results, benchmark_config, job_name) | ||
|
||
|
||
def getAppType(type: str): | ||
match type: | ||
case "ios": | ||
return "IOS_APP" | ||
case "android": | ||
return "ANDROID_APP" | ||
warning( | ||
f"unknown device type detected: {type}, currently we only support ios and android" | ||
) | ||
return "UNKNOWN" | ||
|
||
|
||
def getDeviceOsType(type: str): | ||
match type: | ||
case "ios": | ||
return "iOS" | ||
case "android": | ||
return "Android" | ||
return "UNKNOWN" | ||
|
||
|
||
def generateGitJobLevelFailureRecord(git_job_name: str, app: str) -> Any: | ||
yangw-dev marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
""" | ||
generates benchmark record for GIT_JOB level failure, this is mainly used as placeholder in UI to indicate job failures. | ||
""" | ||
level = "GIT_JOB" | ||
app_type = getAppType(app) | ||
device_prefix = getDeviceOsType(app) | ||
|
||
model_infos = extract_model_info(git_job_name) | ||
model_name = "UNKNOWN" | ||
yangw-dev marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
model_backend = "UNKNOWN" | ||
device_pool_name = "UNKNOWN" | ||
|
||
if model_infos: | ||
model_name = model_infos["model_name"] | ||
model_backend = model_infos["model_backend"] | ||
device_pool_name = model_infos["device_pool_name"] | ||
return transform_failure_record( | ||
app_type, | ||
level, | ||
model_name, | ||
model_backend, | ||
device_pool_name, | ||
device_prefix, | ||
"FAILURE", | ||
) | ||
|
||
if benchmark_results: | ||
results = transform( | ||
app_type, benchmark_results, benchmark_config, job_name | ||
|
||
def generateDeviceLevelFailureRecord( | ||
git_job_name: str, job_report: Any, app: str | ||
) -> Any: | ||
""" | ||
generates benchmark record for DEVICE_JOB level failure, this is mainly used as placeholder in UI to indicate job failures. | ||
""" | ||
level = "DEVICE_JOB" | ||
model_infos = extract_model_info(git_job_name) | ||
model_name = "UNKNOWN" | ||
yangw-dev marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
model_backend = "UNKNOWN" | ||
osPrefix = getDeviceOsType(app) | ||
job_report_os = job_report["os"] | ||
|
||
# make sure the device os name has prefix iOS and Android | ||
device_os = job_report_os | ||
if not job_report_os.startswith(osPrefix): | ||
device_os = f"{osPrefix} {job_report_os}" | ||
|
||
if model_infos: | ||
model_name = model_infos["model_name"] | ||
model_backend = model_infos["model_backend"] | ||
return transform_failure_record( | ||
job_report["app_type"], | ||
level, | ||
model_name, | ||
model_backend, | ||
job_report["name"], | ||
device_os, | ||
job_report["result"], | ||
job_report, | ||
) | ||
|
||
|
||
def process_benchmark_results(content: Any, app: str, benchmark_configs: str): | ||
""" | ||
main code to run to extract benchmark results from artifacts. | ||
Job can be failed at two levels: GIT_JOB and DEVICE_JOB. If any job fails, generate failure benchmark record. | ||
""" | ||
artifacts = content.get("artifacts") | ||
git_job_name = content["git_job_name"] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. A word of caution when trying to extract the information about the run from the job name. The job name comes from this line https://github.com/pytorch/executorch/blob/main/.ci/scripts/gather_benchmark_configs.py#L335. So, I think:
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. sounds good! There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I also raise exception for get_app_type and get_device_os_type. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @huydhn i also added comment in perf.yml for job name step change too. |
||
|
||
# this indicated that the git job fails, generate a failure record | ||
if not artifacts: | ||
info(f"job failed at GIT_JOB level with git job name {git_job_name}") | ||
return [generateGitJobLevelFailureRecord(git_job_name, app)] | ||
|
||
arn_to_artifacts = group_by_arn(artifacts) | ||
job_reports = content["job_reports"] | ||
arn_to_job_report = to_job_report_map(job_reports) | ||
|
||
all_benchmark_results = [] | ||
|
||
# process mobile job's benchmark results. Each job represent one device+os in device pool | ||
for job_arn, job_artifacts in arn_to_artifacts.items(): | ||
job_report = arn_to_job_report.get(job_arn) | ||
|
||
if not job_report: | ||
info( | ||
f"job arn {job_arn} is not recognized in job_reports list {json.dumps(job_reports)}, skip the process" | ||
) | ||
continue | ||
|
||
result = job_report.get("result", "") | ||
if result != "PASSED": | ||
arn = job_report["arn"] | ||
info(f"job {arn} failed at DEVICE_JOB level with result {result}") | ||
# device test failed, generate a failure record instead | ||
all_benchmark_results.append( | ||
generateDeviceLevelFailureRecord(git_job_name, job_report, app) | ||
) | ||
else: | ||
benchmark_config = get_benchmark_config(job_artifacts, benchmark_configs) | ||
for job_artifact in job_artifacts: | ||
# generate result for each schema | ||
results = extractBenchmarkResultFromArtifact( | ||
job_artifact, benchmark_config | ||
) | ||
all_benchmark_results.extend(results) | ||
return all_benchmark_results | ||
|
||
# add v3 in case we have higher version of schema | ||
output_dir = os.path.join(args.output_dir, "v3") | ||
os.makedirs(output_dir, exist_ok=True) | ||
output_file = os.path.basename(args.artifacts) | ||
with open(f"{output_dir}/{output_file}", "w") as f: | ||
json.dump(all_benchmark_results, f) | ||
|
||
def main() -> None: | ||
args = parse_args() | ||
with open(args.artifacts) as f: | ||
content = json.load(f) | ||
all_benchmark_results = process_benchmark_results( | ||
content, args.app, args.benchmark_configs | ||
) | ||
# add v3 in case we have higher version of schema | ||
output_dir = os.path.join(args.output_dir, "v3") | ||
os.makedirs(output_dir, exist_ok=True) | ||
output_file = os.path.basename(args.artifacts) | ||
with open(f"{output_dir}/{output_file}", "w") as f: | ||
json.dump(all_benchmark_results, f) | ||
|
||
|
||
if __name__ == "__main__": | ||
|
Uh oh!
There was an error while loading. Please reload this page.