|
14 | 14 | from argparse import Action, ArgumentParser, Namespace |
15 | 15 | from io import BytesIO |
16 | 16 | from logging import info, warning |
17 | | -from typing import Any, Dict, List, Optional |
| 17 | +from typing import Any, DefaultDict, Dict, List, Optional |
18 | 18 | from urllib import error, request |
19 | 19 |
|
20 | 20 |
|
@@ -94,12 +94,18 @@ def parse_args() -> Any: |
94 | 94 | help="the directory to keep the benchmark configs", |
95 | 95 | ) |
96 | 96 |
|
| 97 | + parser.add_argument( |
| 98 | + "--app", |
| 99 | + type=str, |
| 100 | + required=True, |
| 101 | + action=ValidateDir, |
| 102 | + help="the type of app, ios or android, this is mainly used when a failed job happens to generate default record", |
| 103 | + ) |
| 104 | + |
97 | 105 | return parser.parse_args() |
98 | 106 |
|
99 | 107 |
|
100 | | -def extract_android_benchmark_results( |
101 | | - job_name: str, artifact_type: str, artifact_s3_url: str |
102 | | -) -> List: |
| 108 | +def extract_android_benchmark_results(artifact_type: str, artifact_s3_url: str) -> List: |
103 | 109 | """ |
104 | 110 | The benchmark results from Android have already been stored in CUSTOMER_ARTIFACT |
105 | 111 | artifact, so we will just need to get it |
@@ -220,9 +226,7 @@ def extract_ios_metric( |
220 | 226 | return benchmark_result |
221 | 227 |
|
222 | 228 |
|
223 | | -def extract_ios_benchmark_results( |
224 | | - job_name: str, artifact_type: str, artifact_s3_url: str |
225 | | -) -> List: |
| 229 | +def extract_ios_benchmark_results(artifact_type: str, artifact_s3_url: str) -> List: |
226 | 230 | """ |
227 | 231 | The benchmark results from iOS are currently from xcresult, which could either |
228 | 232 | be parsed from CUSTOMER_ARTIFACT or get from the test spec output. The latter |
@@ -385,54 +389,271 @@ def transform( |
385 | 389 | ] |
386 | 390 |
|
387 | 391 |
|
388 | | -def main() -> None: |
389 | | - args = parse_args() |
| 392 | +def get_model_info(git_job_name: str) -> Optional[Dict[str, str]]: |
| 393 | + """ |
| 394 | + Get model name and backend from git job name. |
| 395 | + the git job name is currently in the format of the git_job_name "benchmark-on-device (ic4, xnnpack_q8, samsung_galaxy_s22, arn:.."" |
| 396 | + """ |
| 397 | + # Extract content inside the first parentheses, |
| 398 | + |
| 399 | + pattern = r"benchmark-on-device \((.+)" |
| 400 | + match = re.search(pattern, git_job_name) |
| 401 | + if not match: |
| 402 | + warning( |
| 403 | + f"pattern not found from git_job_name {git_job_name}, cannot extract correct names" |
| 404 | + ) |
| 405 | + return None |
| 406 | + |
| 407 | + extracted_content = match.group(1) # Get content after the opening parenthesis |
| 408 | + items = extracted_content.split(",") |
| 409 | + if len(items) < 3: |
| 410 | + warning( |
| 411 | + f"expect at least 3 items extrac from git_job_name {git_job_name}, but got {items}" |
| 412 | + ) |
| 413 | + return None |
390 | 414 |
|
391 | | - # Across all devices, keeping both schemas for now until ExecuTorch dashboard migrates to v3 |
392 | | - all_benchmark_results = [] |
393 | | - benchmark_config = {} |
| 415 | + return { |
| 416 | + "model_name": items[0].strip(), |
| 417 | + "model_backend": items[1].strip(), |
| 418 | + "device_pool_name": items[2].strip(), |
| 419 | + } |
394 | 420 |
|
395 | | - with open(args.artifacts) as f: |
396 | | - for artifact in json.load(f): |
397 | | - app_type = artifact.get("app_type", "") |
398 | | - # We expect this to be set to either ANDROID_APP or IOS_APP |
399 | | - if not app_type or app_type not in ["ANDROID_APP", "IOS_APP"]: |
400 | | - info( |
401 | | - f"App type {app_type} is not recognized in artifact {json.dumps(artifact)}" |
402 | | - ) |
403 | | - continue |
404 | 421 |
|
405 | | - job_name = artifact["job_name"] |
406 | | - artifact_type = artifact["type"] |
407 | | - artifact_s3_url = artifact["s3_url"] |
| 422 | +def transform_failure_record( |
| 423 | + app_type: str, |
| 424 | + level: str, |
| 425 | + model_name: str, |
| 426 | + model_backend: str, |
| 427 | + device_name: str, |
| 428 | + device_os: str, |
| 429 | + result: str, |
| 430 | + report: Any = {}, |
| 431 | +) -> Any: |
| 432 | + """ |
| 433 | + Transform the benchmark results into the format writable into the benchmark database for job failures |
| 434 | + """ |
| 435 | + # From https://github.com/pytorch/pytorch/wiki/How-to-integrate-with-PyTorch-OSS-benchmark-database |
| 436 | + return { |
| 437 | + "benchmark": { |
| 438 | + "name": "ExecuTorch", |
| 439 | + "mode": "inference", |
| 440 | + "extra_info": { |
| 441 | + "app_type": app_type, |
| 442 | + "job_conclusion": result, |
| 443 | + "failure_level": level, |
| 444 | + "job_report": json.dumps(report), |
| 445 | + }, |
| 446 | + }, |
| 447 | + "model": { |
| 448 | + "name": model_name, |
| 449 | + "type": "OSS model", |
| 450 | + "backend": model_backend, |
| 451 | + }, |
| 452 | + "metric": { |
| 453 | + "name": "FAILURE_REPORT", |
| 454 | + "benchmark_values": 0, |
| 455 | + "target_value": 0, |
| 456 | + "extra_info": { |
| 457 | + "method": "", |
| 458 | + }, |
| 459 | + }, |
| 460 | + "runners": [ |
| 461 | + { |
| 462 | + "name": device_name, |
| 463 | + "type": device_os, |
| 464 | + "avail_mem_in_gb": "", |
| 465 | + "total_mem_in_gb": "", |
| 466 | + } |
| 467 | + ], |
| 468 | + } |
408 | 469 |
|
409 | | - if artifact_type == "TESTSPEC_OUTPUT": |
410 | | - benchmark_config = read_benchmark_config( |
411 | | - artifact_s3_url, args.benchmark_configs |
412 | | - ) |
413 | | - benchmark_results = [] |
414 | | - if app_type == "ANDROID_APP": |
415 | | - benchmark_results = extract_android_benchmark_results( |
416 | | - job_name, artifact_type, artifact_s3_url |
417 | | - ) |
418 | 470 |
|
419 | | - if app_type == "IOS_APP": |
420 | | - benchmark_results = extract_ios_benchmark_results( |
421 | | - job_name, artifact_type, artifact_s3_url |
422 | | - ) |
| 471 | +def to_job_report_map(job_reports) -> Dict[str, Any]: |
| 472 | + return {job_report["arn"]: job_report for job_report in job_reports} |
| 473 | + |
423 | 474 |
|
424 | | - if benchmark_results: |
425 | | - results = transform( |
426 | | - app_type, benchmark_results, benchmark_config, job_name |
| 475 | +def group_by_arn(artifacts: List) -> Dict[str, List]: |
| 476 | + """ |
| 477 | + Group the artifacts by the job ARN |
| 478 | + """ |
| 479 | + arn_to_artifacts = DefaultDict(list) |
| 480 | + for artifact in artifacts: |
| 481 | + job_arn = artifact.get("job_arn", "") |
| 482 | + app_type = artifact.get("app_type", "") |
| 483 | + if not app_type or app_type not in ["ANDROID_APP", "IOS_APP"]: |
| 484 | + info( |
| 485 | + f"App type {app_type} is not recognized in artifact {json.dumps(artifact)}" |
| 486 | + ) |
| 487 | + continue |
| 488 | + if not job_arn: |
| 489 | + info(f"missing job_arn in artifact {json.dumps(artifact)}") |
| 490 | + continue |
| 491 | + arn_to_artifacts[job_arn].append(artifact) |
| 492 | + return arn_to_artifacts |
| 493 | + |
| 494 | + |
| 495 | +# get the benchmark config from TestSpec file if any exist |
| 496 | +def get_benchmark_config( |
| 497 | + artifacts: List[Dict[str, Any]], benchmark_configs: str |
| 498 | +) -> Dict[str, str]: |
| 499 | + result = next( |
| 500 | + (artifact for artifact in artifacts if artifact["type"] == "TESTSPEC_OUTPUT"), |
| 501 | + None, |
| 502 | + ) |
| 503 | + if not result: |
| 504 | + return {} |
| 505 | + artifact_s3_url = result["s3_url"] |
| 506 | + return read_benchmark_config(artifact_s3_url, benchmark_configs) |
| 507 | + |
| 508 | + |
| 509 | +def extractBenchmarkResultFromArtifact( |
| 510 | + artifact: Dict[str, Any], |
| 511 | + benchmark_config: Dict[str, str], |
| 512 | +) -> List[Any]: |
| 513 | + job_name = artifact.get("job_name", "") |
| 514 | + artifact_type = artifact.get("type", "") |
| 515 | + artifact_s3_url = artifact.get("s3_url", "") |
| 516 | + app_type = artifact.get("app_type", "") |
| 517 | + |
| 518 | + info( |
| 519 | + f"Processing {app_type} artifact: {job_name} {artifact_type} {artifact_s3_url}" |
| 520 | + ) |
| 521 | + benchmark_results = [] |
| 522 | + if app_type == "ANDROID_APP": |
| 523 | + benchmark_results = extract_android_benchmark_results( |
| 524 | + artifact_type, artifact_s3_url |
| 525 | + ) |
| 526 | + if app_type == "IOS_APP": |
| 527 | + benchmark_results = extract_ios_benchmark_results( |
| 528 | + artifact_type, artifact_s3_url |
| 529 | + ) |
| 530 | + if not benchmark_results: |
| 531 | + return [] |
| 532 | + return transform(app_type, benchmark_results, benchmark_config, job_name) |
| 533 | + |
| 534 | + |
| 535 | +def getAppType(type: str): |
| 536 | + match type: |
| 537 | + case "ios": |
| 538 | + return "IOS_APP" |
| 539 | + case "android": |
| 540 | + return "ANDROID_APP" |
| 541 | + warning( |
| 542 | + f"unknown device type detected: {type}, currently we only support ios and android" |
| 543 | + ) |
| 544 | + return "UNKNOWN" |
| 545 | + |
| 546 | + |
| 547 | +def generateGitJobLevelFailureRecord(git_job_name: str, app: str) -> Any: |
| 548 | + """ |
| 549 | + generates benchmark record for GIT_JOB level failure, this is mainly used as placeholder in UI to indicate job failures. |
| 550 | + """ |
| 551 | + level = "GIT_JOB" |
| 552 | + app_type = getAppType(app) |
| 553 | + model_infos = get_model_info(git_job_name) |
| 554 | + model_name = "UNKNOWN" |
| 555 | + model_backend = "UNKNOWN" |
| 556 | + device_pool_name = "UNKNOWN" |
| 557 | + |
| 558 | + if model_infos: |
| 559 | + model_name = model_infos["model_name"] |
| 560 | + model_backend = model_infos["model_backend"] |
| 561 | + device_pool_name = model_infos["device_pool_name"] |
| 562 | + return transform_failure_record( |
| 563 | + app_type, |
| 564 | + level, |
| 565 | + model_name, |
| 566 | + model_backend, |
| 567 | + device_pool_name, |
| 568 | + "UNKNOWN", |
| 569 | + "FAILURE", |
| 570 | + ) |
| 571 | + |
| 572 | + |
| 573 | +def generateDeviceLevelFailureRecord(git_job_name: str, job_report: Any) -> Any: |
| 574 | + """ |
| 575 | + generates benchmark record for DEVICE_JOB level failure, this is mainly used as placeholder in UI to indicate job failures. |
| 576 | + """ |
| 577 | + level = "DEVICE_JOB" |
| 578 | + model_infos = get_model_info(git_job_name) |
| 579 | + model_name = "UNKNOWN" |
| 580 | + model_backend = "UNKNOWN" |
| 581 | + if model_infos: |
| 582 | + model_name = model_infos["model_name"] |
| 583 | + model_backend = model_infos["model_backend"] |
| 584 | + return transform_failure_record( |
| 585 | + job_report["app_type"], |
| 586 | + level, |
| 587 | + model_name, |
| 588 | + model_backend, |
| 589 | + job_report["name"], |
| 590 | + job_report["os"], |
| 591 | + job_report["result"], |
| 592 | + job_report, |
| 593 | + ) |
| 594 | + |
| 595 | + |
| 596 | +def process_benchmark_results(content: Any, app: str, benchmark_configs: str): |
| 597 | + """ |
| 598 | + main code to run to extract benchmark results from artifacts. |
| 599 | + Job can be failed at two levels: GIT_JOB and DEVICE_JOB. If any job fails, generate failure benchmark record. |
| 600 | + """ |
| 601 | + artifacts = content.get("artifacts") |
| 602 | + git_job_name = content["git_job_name"] |
| 603 | + |
| 604 | + # this indicated that the git job fails, generate a failure record |
| 605 | + if not artifacts: |
| 606 | + return [generateGitJobLevelFailureRecord(git_job_name, app)] |
| 607 | + |
| 608 | + arn_to_artifacts = group_by_arn(artifacts) |
| 609 | + job_reports = content["job_reports"] |
| 610 | + arn_to_job_report = to_job_report_map(job_reports) |
| 611 | + |
| 612 | + all_benchmark_results = [] |
| 613 | + |
| 614 | + # process mobile job's benchmark results. Each job represent one device+os in device pool |
| 615 | + for job_arn, job_artifacts in arn_to_artifacts.items(): |
| 616 | + job_report = arn_to_job_report.get(job_arn) |
| 617 | + |
| 618 | + if not job_report: |
| 619 | + info( |
| 620 | + f"job arn {job_arn} is not recognized in job_reports list {json.dumps(job_reports)}, skip the process" |
| 621 | + ) |
| 622 | + continue |
| 623 | + |
| 624 | + result = job_report.get("result", "") |
| 625 | + if result != "PASSED": |
| 626 | + info( |
| 627 | + f"job {job_report.get("arn")} failed at DEVICE_JOB level with result {result}" |
| 628 | + ) |
| 629 | + # device test failed, generate a failure record instead |
| 630 | + all_benchmark_results.append( |
| 631 | + generateDeviceLevelFailureRecord(git_job_name, job_report) |
| 632 | + ) |
| 633 | + else: |
| 634 | + benchmark_config = get_benchmark_config(job_artifacts, benchmark_configs) |
| 635 | + for job_artifact in job_artifacts: |
| 636 | + # generate result for each schema |
| 637 | + results = extractBenchmarkResultFromArtifact( |
| 638 | + job_artifact, benchmark_config |
427 | 639 | ) |
428 | 640 | all_benchmark_results.extend(results) |
| 641 | + return all_benchmark_results |
429 | 642 |
|
430 | | - # add v3 in case we have higher version of schema |
431 | | - output_dir = os.path.join(args.output_dir, "v3") |
432 | | - os.makedirs(output_dir, exist_ok=True) |
433 | | - output_file = os.path.basename(args.artifacts) |
434 | | - with open(f"{output_dir}/{output_file}", "w") as f: |
435 | | - json.dump(all_benchmark_results, f) |
| 643 | + |
| 644 | +def main() -> None: |
| 645 | + args = parse_args() |
| 646 | + with open(args.artifacts) as f: |
| 647 | + content = json.load(f) |
| 648 | + all_benchmark_results = process_benchmark_results( |
| 649 | + content, args.app, args.benchmark_configs |
| 650 | + ) |
| 651 | + # add v3 in case we have higher version of schema |
| 652 | + output_dir = os.path.join(args.output_dir, "v3") |
| 653 | + os.makedirs(output_dir, exist_ok=True) |
| 654 | + output_file = os.path.basename(args.artifacts) |
| 655 | + with open(f"{output_dir}/{output_file}", "w") as f: |
| 656 | + json.dump(all_benchmark_results, f) |
436 | 657 |
|
437 | 658 |
|
438 | 659 | if __name__ == "__main__": |
|
0 commit comments