Skip to content
39 changes: 39 additions & 0 deletions .github/workflows/collated-reports.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
name: CI collated reports

on:
workflow_call:
inputs:
job:
required: true
type: string
report_repo_id:
required: true
type: string
machine_type:
required: true
type: string

jobs:
collated_reports:
name: Collated reports
runs-on: ubuntu-22.04
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is running on ubuntu-22.04

if: always()
steps:
- uses: actions/checkout@v4
- uses: actions/download-artifact@v4

- name: Collated reports
shell: bash
env:
ACCESS_REPO_INFO_TOKEN: ${{ secrets.ACCESS_REPO_INFO_TOKEN }}
CI_SHA: ${{ github.sha }}
TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN: ${{ secrets.TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN }}
run: |
pip install huggingface_hub
python3 utils/collated_reports.py --path /transformers/reports/ --glob ${{ inputs.machine_type }}* --commit-hash ${{ env.CI_SHA }} --job ${{ inputs.job }} --report-repo-id ${{ inputs.report_repo_id }}
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

but we don't pass gpu_name here

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Haha good point!

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think gpu name has been fixed now


- name: Upload collated reports
uses: actions/upload-artifact@v4
with:
name: collated_reports_${{ env.CI_SHA }}.json
path: collated_reports_${{ env.CI_SHA }}.json
192 changes: 192 additions & 0 deletions utils/collated_reports.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,192 @@
# Copyright 2025 The HuggingFace Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import argparse
import json
import subprocess
from pathlib import Path


DEFAULT_GPU_NAMES = ["mi300", "mi355", "h100", "a10"]


def simplify_gpu_name(gpu_name: str, simplified_names: list[str]) -> str:
matches = []
for simplified_name in simplified_names:
if simplified_name in gpu_name:
matches.append(simplified_name)
if len(matches) == 1:
return matches[0]
return gpu_name


def parse_short_summary_line(line: str) -> tuple[str | None, int]:
if line.startswith("PASSED"):
return "passed", 1
if line.startswith("FAILED"):
return "failed", 1
if line.startswith("SKIPPED"):
line = line.split("[", maxsplit=1)[1]
line = line.split("]", maxsplit=1)[0]
return "skipped", int(line)
if line.startswith("ERROR"):
return "error", 1
return None, 0


def get_paths(p: str, glob_pattern: str | None) -> list[Path]:
# Validate path and apply glob pattern if provided
path = Path(p)
assert path.is_dir(), f"Path {path} is not a directory"
if glob_pattern is None:
return [path]

return [p for p in path.glob(glob_pattern) if p.is_dir()]


def get_gpu_name(gpu_name: str | None) -> str:
# Get GPU name if available
if gpu_name is None:
try:
import torch

gpu_name = torch.cuda.get_device_name()
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this would not get the result when it is from .github/workflows/collated-reports.yml as we are running on ubuntu

except Exception as e:
print(f"Failed to get GPU name with {e}")
gpu_name = "unknown"
else:
gpu_name = gpu_name.replace(" ", "_").lower()
gpu_name = simplify_gpu_name(gpu_name, DEFAULT_GPU_NAMES)

return gpu_name
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what not make the argument mandatory?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's just to make it easier to debug manually on the machine :)



def get_commit_hash(commit_hash: str | None) -> str:
# Get commit hash if available
if commit_hash is None:
try:
commit_hash = subprocess.check_output(["git", "rev-parse", "HEAD"]).decode("utf-8").strip()
except Exception as e:
print(f"Failed to get commit hash with {e}")
commit_hash = "unknown"

return commit_hash[:7]
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

might be easier to make the argument mandatory?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same as above. It's just to make debugging easier when you're logged into the machine.



def get_arguments(args: argparse.Namespace) -> tuple[list[Path], str, str, str, str]:
paths = get_paths(args.path, args.glob)
gpu_name = get_gpu_name(args.gpu_name)
commit_hash = get_commit_hash(args.commit_hash)
job = args.job
report_repo_id = args.report_repo_id
return paths, gpu_name, commit_hash, job, report_repo_id


def upload_collated_report(job: str, report_repo_id: str, filename: str):
# Alternatively we can check for the existence of the collated_reports file and upload in notification_service.py
import os

from get_previous_daily_ci import get_last_daily_ci_run
from huggingface_hub import HfApi

api = HfApi()

# if it is not a scheduled run, upload the reports to a subfolder under `report_repo_folder`
report_repo_subfolder = ""
if os.getenv("GITHUB_EVENT_NAME") != "schedule":
report_repo_subfolder = f"{os.getenv('GITHUB_RUN_NUMBER')}-{os.getenv('GITHUB_RUN_ID')}"
report_repo_subfolder = f"runs/{report_repo_subfolder}"

workflow_run = get_last_daily_ci_run(
token=os.environ["ACCESS_REPO_INFO_TOKEN"], workflow_run_id=os.getenv("GITHUB_RUN_ID")
)
workflow_run_created_time = workflow_run["created_at"]
report_repo_folder = workflow_run_created_time.split("T")[0]

if report_repo_subfolder:
report_repo_folder = f"{report_repo_folder}/{report_repo_subfolder}"

api.upload_file(
path_or_fileobj=f"ci_results_{job}/{filename}",
path_in_repo=f"{report_repo_folder}/ci_results_{job}/{filename}",
repo_id=report_repo_id,
repo_type="dataset",
token=os.getenv("TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN"),
)


if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Post process models test reports.")
parser.add_argument("--path", "-p", help="Path to the reports folder")
parser.add_argument("--glob", "-p", help="Glob pattern to access test reports folders", default=None)
parser.add_argument("--gpu-name", "-g", help="GPU name", default=None)
parser.add_argument("--commit-hash", "-c", help="Commit hash", default=None)
parser.add_argument("--job", "-j", help="Optional job name required for uploading reports", default=None)
parser.add_argument(
"--report-repo-id", "-r", help="Optional report repository ID required for uploading reports", default=None
)
paths, gpu_name, commit_hash, job, report_repo_id = get_arguments(parser.parse_args())

# Initialize accumulators for collated report
total_status_count = {
"passed": 0,
"failed": 0,
"skipped": 0,
"error": 0,
None: 0,
}
collated_report_buffer = []

for model_dir in sorted(paths):
# Create a new entry for the model
model_name = model_dir.name.removesuffix("_test_reports")
report = {"model": model_name, "results": []}
results = []

# Read short summary
with open(model_dir / "summary_short.txt", "r") as f:
short_summary_lines = f.readlines()

# Parse short summary
for line in short_summary_lines[1:]:
status, count = parse_short_summary_line(line)
total_status_count[status] += count
if status:
result = {
"status": status,
"test": line.split(status.upper(), maxsplit=1)[1].strip(),
"count": count,
}
results.append(result)

# Add short summaries to report
report["results"] = results

collated_report_buffer.append(report)

# Write collated report
with open(f"collated_reports_{commit_hash}.json", "w") as f:
json.dump(
{
"gpu_name": gpu_name,
"commit_hash": commit_hash,
"total_status_count": total_status_count,
"results": collated_report_buffer,
},
f,
indent=2,
)

if job and report_repo_id:
upload_collated_report(job, report_repo_id, f"collated_reports_{commit_hash}.json")