Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
183 changes: 48 additions & 135 deletions .github/scripts/extract_benchmark_results.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,36 +86,6 @@ def parse_args() -> Any:
action=ValidateDir,
help="the directory to keep the benchmark results",
)
parser.add_argument(
"--repo",
type=str,
required=True,
help="which GitHub repo this workflow run belongs to",
)
parser.add_argument(
"--head-branch",
type=str,
required=True,
help="the head branch that runs",
)
parser.add_argument(
"--workflow-name",
type=str,
required=True,
help="the name of the benchmark workflow",
)
parser.add_argument(
"--workflow-run-id",
type=int,
required=True,
help="the id of the benchmark workflow",
)
parser.add_argument(
"--workflow-run-attempt",
type=int,
required=True,
help="which retry of the workflow this is",
)
parser.add_argument(
"--benchmark-configs",
type=str,
Expand Down Expand Up @@ -153,9 +123,10 @@ def extract_android_benchmark_results(
# This is to handle the case where there is no benchmark results
warning(f"Fail to load the benchmark results from {artifact_s3_url}")
return []
return []


def initialize_ios_metadata(test_name: str) -> Dict[str, any]:
def initialize_ios_metadata(test_name: str) -> Dict[str, Any]:
"""
Extract the benchmark metadata from the test name, for example:
test_forward_llama2_pte_iOS_17_2_1_iPhone15_4
Expand Down Expand Up @@ -364,14 +335,7 @@ def transform(
app_type: str,
benchmark_results: List,
benchmark_config: Dict[str, str],
repo: str,
head_branch: str,
workflow_name: str,
workflow_run_id: int,
workflow_run_attempt: int,
job_name: str,
job_id: int,
schema_version: str,
) -> List:
"""
Transform the benchmark results into the format writable into the benchmark database
Expand All @@ -381,87 +345,51 @@ def transform(
for r in benchmark_results:
r["deviceInfo"]["device"] = job_name

if schema_version == "v2":
# TODO (huydhn): Clean up this branch after ExecuTorch dashboard migrates to v3
return [
{
# GH-info to identify where the benchmark is run
"repo": repo,
"head_branch": head_branch,
"workflow_id": workflow_run_id,
"run_attempt": workflow_run_attempt,
"job_id": job_id,
# The model
"name": f"{r['benchmarkModel']['name']} {r['benchmarkModel'].get('backend', '')}".strip(),
"dtype": (
r["benchmarkModel"]["quantization"]
if r["benchmarkModel"]["quantization"]
else "unknown"
),
# The metric value
"metric": r["metric"],
"actual": r["actualValue"],
"target": r["targetValue"],
# The device
"device": r["deviceInfo"]["device"],
"arch": r["deviceInfo"].get("os", ""),
# Not used here, just set it to something unique here
"filename": workflow_name,
"test_name": app_type,
"runner": job_name,
}
for r in benchmark_results
]
elif schema_version == "v3":
v3_benchmark_results = []
# From https://github.com/pytorch/pytorch/wiki/How-to-integrate-with-PyTorch-OSS-benchmark-database
return [
{
"benchmark": {
"name": "ExecuTorch",
"mode": "inference",
"extra_info": {
"app_type": app_type,
# Just keep a copy of the benchmark config here
"benchmark_config": json.dumps(benchmark_config),
},
},
"model": {
"name": benchmark_config.get("model", r["benchmarkModel"]["name"]),
"type": "OSS model",
"backend": benchmark_config.get(
"config", r["benchmarkModel"].get("backend", "")
),
# From https://github.com/pytorch/pytorch/wiki/How-to-integrate-with-PyTorch-OSS-benchmark-database
return [
{
"benchmark": {
"name": "ExecuTorch",
"mode": "inference",
"extra_info": {
"app_type": app_type,
# Just keep a copy of the benchmark config here
"benchmark_config": json.dumps(benchmark_config),
},
"metric": {
"name": r["metric"],
"benchmark_values": [r["actualValue"]],
"target_value": r["targetValue"],
"extra_info": {
"method": r.get("method", ""),
},
},
"model": {
"name": benchmark_config.get("model", r["benchmarkModel"]["name"]),
"type": "OSS model",
"backend": benchmark_config.get(
"config", r["benchmarkModel"].get("backend", "")
),
},
"metric": {
"name": r["metric"],
"benchmark_values": [r["actualValue"]],
"target_value": r["targetValue"],
"extra_info": {
"method": r.get("method", ""),
},
"runners": [
{
"name": r["deviceInfo"]["device"],
"type": r["deviceInfo"]["os"],
"avail_mem_in_gb": r["deviceInfo"].get("availMem", ""),
"total_mem_in_gb": r["deviceInfo"].get("totalMem", ""),
}
],
}
for r in benchmark_results
]
},
"runners": [
{
"name": r["deviceInfo"]["device"],
"type": r["deviceInfo"]["os"],
"avail_mem_in_gb": r["deviceInfo"].get("availMem", ""),
"total_mem_in_gb": r["deviceInfo"].get("totalMem", ""),
}
],
}
for r in benchmark_results
]


def main() -> None:
args = parse_args()

# Across all devices, keeping both schemas for now until ExecuTorch dashboard migrates to v3
all_benchmark_results = {
"v2": [],
"v3": [],
}
all_benchmark_results = []
benchmark_config = {}

with open(args.artifacts) as f:
Expand All @@ -482,7 +410,7 @@ def main() -> None:
benchmark_config = read_benchmark_config(
artifact_s3_url, args.benchmark_configs
)

benchmark_results = []
if app_type == "ANDROID_APP":
benchmark_results = extract_android_benchmark_results(
job_name, artifact_type, artifact_s3_url
Expand All @@ -494,32 +422,17 @@ def main() -> None:
)

if benchmark_results:
for schema in all_benchmark_results.keys():
results = transform(
app_type,
benchmark_results,
benchmark_config,
args.repo,
args.head_branch,
args.workflow_name,
args.workflow_run_id,
args.workflow_run_attempt,
job_name,
extract_job_id(args.artifacts),
schema,
)
all_benchmark_results[schema].extend(results)

for schema in all_benchmark_results.keys():
if not all_benchmark_results.get(schema):
continue

output_dir = os.path.join(args.output_dir, schema)
os.makedirs(output_dir, exist_ok=True)
results = transform(
app_type, benchmark_results, benchmark_config, job_name
)
all_benchmark_results.extend(results)

# add v3 in case we have higher version of schema
output_dir = os.path.join(args.output_dir, "v3")
os.makedirs(output_dir, exist_ok=True)
output_file = os.path.basename(args.artifacts)
with open(f"{output_dir}/{output_file}", "w") as f:
json.dump(all_benchmark_results[schema], f)
json.dump(all_benchmark_results, f)


if __name__ == "__main__":
Expand Down
21 changes: 3 additions & 18 deletions .github/workflows/android-perf.yml
Original file line number Diff line number Diff line change
Expand Up @@ -462,29 +462,14 @@ jobs:
${CONDA_RUN} python .github/scripts/extract_benchmark_results.py \
--artifacts "${ARTIFACTS_BY_JOB}" \
--output-dir benchmark-results \
--repo ${{ github.repository }} \
--head-branch ${{ github.head_ref || github.ref_name }} \
--workflow-name "${{ github.workflow }}" \
--workflow-run-id ${{ github.run_id }} \
--workflow-run-attempt ${{ github.run_attempt }} \
--benchmark-configs benchmark-configs
done

for SCHEMA in v2 v3; do
for BENCHMARK_RESULTS in benchmark-results/"${SCHEMA}"/*.json; do
cat "${BENCHMARK_RESULTS}"
echo
done
for BENCHMARK_RESULTS in benchmark-results/v3/*.json; do
cat "${BENCHMARK_RESULTS}"
echo
done

# TODO (huydhn): Remove v2 schema once the benchmark dashboard finishes the migration
- name: Upload the benchmark results (v2)
uses: pytorch/test-infra/.github/actions/upload-benchmark-results@main
with:
benchmark-results-dir: benchmark-results/v2
dry-run: false
schema-version: v2

- name: Upload the benchmark results (v3)
uses: pytorch/test-infra/.github/actions/upload-benchmark-results@main
with:
Expand Down
21 changes: 3 additions & 18 deletions .github/workflows/apple-perf.yml
Original file line number Diff line number Diff line change
Expand Up @@ -521,29 +521,14 @@ jobs:
${CONDA_RUN} python .github/scripts/extract_benchmark_results.py \
--artifacts "${ARTIFACTS_BY_JOB}" \
--output-dir benchmark-results \
--repo ${{ github.repository }} \
--head-branch ${{ github.head_ref || github.ref_name }} \
--workflow-name "${{ github.workflow }}" \
--workflow-run-id ${{ github.run_id }} \
--workflow-run-attempt ${{ github.run_attempt }} \
--benchmark-configs benchmark-configs
done

for SCHEMA in v2 v3; do
for BENCHMARK_RESULTS in benchmark-results/"${SCHEMA}"/*.json; do
cat "${BENCHMARK_RESULTS}"
echo
done
for BENCHMARK_RESULTS in benchmark-results/v3/*.json; do
cat "${BENCHMARK_RESULTS}"
echo
done

# TODO (huydhn): Remove v2 schema once the benchmark dashboard finishes the migration
- name: Upload the benchmark results (v2)
uses: pytorch/test-infra/.github/actions/upload-benchmark-results@main
with:
benchmark-results-dir: benchmark-results/v2
dry-run: false
schema-version: v2

- name: Upload the benchmark results (v3)
uses: pytorch/test-infra/.github/actions/upload-benchmark-results@main
with:
Expand Down
Loading