Skip to content

Commit ebceef3

Browse files
authored
Collated reports (#40080)
* Add initial collated reports script and job definition * provide commit hash for this run. Also use hash in generated artifact name. Json formatting * tidy * Add option to upload collated reports to hf hub * Add glob pattern for test report folders * Fix glob * Use machine_type as path filter instead of glob. Include machine_type in collated report
1 parent e78571f commit ebceef3

File tree

2 files changed

+268
-0
lines changed

2 files changed

+268
-0
lines changed
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
name: CI collated reports
2+
3+
on:
4+
workflow_call:
5+
inputs:
6+
job:
7+
required: true
8+
type: string
9+
report_repo_id:
10+
required: true
11+
type: string
12+
machine_type:
13+
required: true
14+
type: string
15+
gpu_name:
16+
description: Name of the GPU used for the job. Its enough that the value contains the name of the GPU, e.g. "noise-h100-more-noise". Case insensitive.
17+
required: true
18+
type: string
19+
20+
jobs:
21+
collated_reports:
22+
name: Collated reports
23+
runs-on: ubuntu-22.04
24+
if: always()
25+
steps:
26+
- uses: actions/checkout@v4
27+
- uses: actions/download-artifact@v4
28+
29+
- name: Collated reports
30+
shell: bash
31+
env:
32+
ACCESS_REPO_INFO_TOKEN: ${{ secrets.ACCESS_REPO_INFO_TOKEN }}
33+
CI_SHA: ${{ github.sha }}
34+
TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN: ${{ secrets.TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN }}
35+
run: |
36+
pip install huggingface_hub
37+
python3 utils/collated_reports.py \
38+
--path /transformers/reports/ \
39+
--machine-type ${{ inputs.machine_type }} \
40+
--commit-hash ${{ env.CI_SHA }} \
41+
--job ${{ inputs.job }} \
42+
--report-repo-id ${{ inputs.report_repo_id }} \
43+
--gpu-name ${{ inputs.gpu_name }}
44+
45+
- name: Upload collated reports
46+
uses: actions/upload-artifact@v4
47+
with:
48+
name: collated_reports_${{ env.CI_SHA }}.json
49+
path: collated_reports_${{ env.CI_SHA }}.json

utils/collated_reports.py

Lines changed: 219 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,219 @@
1+
# Copyright 2025 The HuggingFace Team. All rights reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
import argparse
16+
import json
17+
import subprocess
18+
from dataclasses import dataclass
19+
from pathlib import Path
20+
21+
22+
DEFAULT_GPU_NAMES = ["mi300", "mi325", "mi355", "h100", "a10"]
23+
24+
25+
def simplify_gpu_name(gpu_name: str, simplified_names: list[str]) -> str:
26+
matches = []
27+
for simplified_name in simplified_names:
28+
if simplified_name in gpu_name:
29+
matches.append(simplified_name)
30+
if len(matches) == 1:
31+
return matches[0]
32+
return gpu_name
33+
34+
35+
def parse_short_summary_line(line: str) -> tuple[str | None, int]:
36+
if line.startswith("PASSED"):
37+
return "passed", 1
38+
if line.startswith("FAILED"):
39+
return "failed", 1
40+
if line.startswith("SKIPPED"):
41+
line = line.split("[", maxsplit=1)[1]
42+
line = line.split("]", maxsplit=1)[0]
43+
return "skipped", int(line)
44+
if line.startswith("ERROR"):
45+
return "error", 1
46+
return None, 0
47+
48+
49+
def validate_path(p: str) -> Path:
50+
# Validate path and apply glob pattern if provided
51+
path = Path(p)
52+
assert path.is_dir(), f"Path {path} is not a directory"
53+
return path
54+
55+
56+
def get_gpu_name(gpu_name: str | None) -> str:
57+
# Get GPU name if available
58+
if gpu_name is None:
59+
try:
60+
import torch
61+
62+
gpu_name = torch.cuda.get_device_name()
63+
except Exception as e:
64+
print(f"Failed to get GPU name with {e}")
65+
gpu_name = "unknown"
66+
else:
67+
gpu_name = gpu_name.replace(" ", "_").lower()
68+
gpu_name = simplify_gpu_name(gpu_name, DEFAULT_GPU_NAMES)
69+
70+
return gpu_name
71+
72+
73+
def get_commit_hash(commit_hash: str | None) -> str:
74+
# Get commit hash if available
75+
if commit_hash is None:
76+
try:
77+
commit_hash = subprocess.check_output(["git", "rev-parse", "HEAD"]).decode("utf-8").strip()
78+
except Exception as e:
79+
print(f"Failed to get commit hash with {e}")
80+
commit_hash = "unknown"
81+
82+
return commit_hash[:7]
83+
84+
85+
@dataclass
86+
class Args:
87+
path: Path
88+
machine_type: str
89+
gpu_name: str
90+
commit_hash: str
91+
job: str | None
92+
report_repo_id: str | None
93+
94+
95+
def get_arguments(args: argparse.Namespace) -> Args:
96+
path = validate_path(args.path)
97+
machine_type = args.machine_type
98+
gpu_name = get_gpu_name(args.gpu_name)
99+
commit_hash = get_commit_hash(args.commit_hash)
100+
job = args.job
101+
report_repo_id = args.report_repo_id
102+
return Args(path, machine_type, gpu_name, commit_hash, job, report_repo_id)
103+
104+
105+
def upload_collated_report(job: str, report_repo_id: str, filename: str):
106+
# Alternatively we can check for the existence of the collated_reports file and upload in notification_service.py
107+
import os
108+
109+
from get_previous_daily_ci import get_last_daily_ci_run
110+
from huggingface_hub import HfApi
111+
112+
api = HfApi()
113+
114+
# if it is not a scheduled run, upload the reports to a subfolder under `report_repo_folder`
115+
report_repo_subfolder = ""
116+
if os.getenv("GITHUB_EVENT_NAME") != "schedule":
117+
report_repo_subfolder = f"{os.getenv('GITHUB_RUN_NUMBER')}-{os.getenv('GITHUB_RUN_ID')}"
118+
report_repo_subfolder = f"runs/{report_repo_subfolder}"
119+
120+
workflow_run = get_last_daily_ci_run(
121+
token=os.environ["ACCESS_REPO_INFO_TOKEN"], workflow_run_id=os.getenv("GITHUB_RUN_ID")
122+
)
123+
workflow_run_created_time = workflow_run["created_at"]
124+
report_repo_folder = workflow_run_created_time.split("T")[0]
125+
126+
if report_repo_subfolder:
127+
report_repo_folder = f"{report_repo_folder}/{report_repo_subfolder}"
128+
129+
api.upload_file(
130+
path_or_fileobj=f"ci_results_{job}/{filename}",
131+
path_in_repo=f"{report_repo_folder}/ci_results_{job}/{filename}",
132+
repo_id=report_repo_id,
133+
repo_type="dataset",
134+
token=os.getenv("TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN"),
135+
)
136+
137+
138+
if __name__ == "__main__":
139+
parser = argparse.ArgumentParser(description="Post process models test reports.")
140+
parser.add_argument("--path", "-p", help="Path to the reports folder")
141+
parser.add_argument(
142+
"--machine-type", "-m", help="Process single or multi GPU results", choices=["single-gpu", "multi-gpu"]
143+
)
144+
parser.add_argument("--gpu-name", "-g", help="GPU name", default=None)
145+
parser.add_argument("--commit-hash", "-c", help="Commit hash", default=None)
146+
parser.add_argument("--job", "-j", help="Optional job name required for uploading reports", default=None)
147+
parser.add_argument(
148+
"--report-repo-id", "-r", help="Optional report repository ID required for uploading reports", default=None
149+
)
150+
args = get_arguments(parser.parse_args())
151+
152+
# Initialize accumulators for collated report
153+
total_status_count = {
154+
"passed": 0,
155+
"failed": 0,
156+
"skipped": 0,
157+
"error": 0,
158+
None: 0,
159+
}
160+
collated_report_buffer = []
161+
162+
path = args.path
163+
machine_type = args.machine_type
164+
gpu_name = args.gpu_name
165+
commit_hash = args.commit_hash
166+
job = args.job
167+
report_repo_id = args.report_repo_id
168+
169+
# Find the origin directory based on machine type
170+
origin = path
171+
for p in path.iterdir():
172+
if machine_type in p.name:
173+
origin = p
174+
break
175+
176+
# Loop through model directories and create collated reports
177+
for model_dir in sorted(origin.iterdir()):
178+
# Create a new entry for the model
179+
model_name = model_dir.name.removesuffix("_test_reports")
180+
report = {"model": model_name, "results": []}
181+
results = []
182+
183+
# Read short summary
184+
with open(model_dir / "summary_short.txt", "r") as f:
185+
short_summary_lines = f.readlines()
186+
187+
# Parse short summary
188+
for line in short_summary_lines[1:]:
189+
status, count = parse_short_summary_line(line)
190+
total_status_count[status] += count
191+
if status:
192+
result = {
193+
"status": status,
194+
"test": line.split(status.upper(), maxsplit=1)[1].strip(),
195+
"count": count,
196+
}
197+
results.append(result)
198+
199+
# Add short summaries to report
200+
report["results"] = results
201+
202+
collated_report_buffer.append(report)
203+
204+
# Write collated report
205+
with open(f"collated_reports_{commit_hash}.json", "w") as f:
206+
json.dump(
207+
{
208+
"gpu_name": gpu_name,
209+
"machine_type": machine_type,
210+
"commit_hash": commit_hash,
211+
"total_status_count": total_status_count,
212+
"results": collated_report_buffer,
213+
},
214+
f,
215+
indent=2,
216+
)
217+
218+
if job and report_repo_id:
219+
upload_collated_report(job, report_repo_id, f"collated_reports_{commit_hash}.json")

0 commit comments

Comments
 (0)