Skip to content
49 changes: 49 additions & 0 deletions .github/workflows/collated-reports.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
name: CI collated reports

on:
workflow_call:
inputs:
job:
required: true
type: string
report_repo_id:
required: true
type: string
machine_type:
required: true
type: string
gpu_name:
description: Name of the GPU used for the job. Its enough that the value contains the name of the GPU, e.g. "noise-h100-more-noise". Case insensitive.
required: true
type: string

jobs:
collated_reports:
name: Collated reports
runs-on: ubuntu-22.04
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is running on ubuntu-22.04

if: always()
steps:
- uses: actions/checkout@v4
- uses: actions/download-artifact@v4

- name: Collated reports
shell: bash
env:
ACCESS_REPO_INFO_TOKEN: ${{ secrets.ACCESS_REPO_INFO_TOKEN }}
CI_SHA: ${{ github.sha }}
TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN: ${{ secrets.TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN }}
run: |
pip install huggingface_hub
python3 utils/collated_reports.py \
--path /transformers/reports/ \
--machine-type ${{ inputs.machine_type }} \
--commit-hash ${{ env.CI_SHA }} \
--job ${{ inputs.job }} \
--report-repo-id ${{ inputs.report_repo_id }} \
--gpu-name ${{ inputs.gpu_name }}

- name: Upload collated reports
uses: actions/upload-artifact@v4
with:
name: collated_reports_${{ env.CI_SHA }}.json
path: collated_reports_${{ env.CI_SHA }}.json
219 changes: 219 additions & 0 deletions utils/collated_reports.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,219 @@
# Copyright 2025 The HuggingFace Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import argparse
import json
import subprocess
from dataclasses import dataclass
from pathlib import Path


DEFAULT_GPU_NAMES = ["mi300", "mi325", "mi355", "h100", "a10"]


def simplify_gpu_name(gpu_name: str, simplified_names: list[str]) -> str:
matches = []
for simplified_name in simplified_names:
if simplified_name in gpu_name:
matches.append(simplified_name)
if len(matches) == 1:
return matches[0]
return gpu_name


def parse_short_summary_line(line: str) -> tuple[str | None, int]:
if line.startswith("PASSED"):
return "passed", 1
if line.startswith("FAILED"):
return "failed", 1
if line.startswith("SKIPPED"):
line = line.split("[", maxsplit=1)[1]
line = line.split("]", maxsplit=1)[0]
return "skipped", int(line)
if line.startswith("ERROR"):
return "error", 1
return None, 0


def validate_path(p: str) -> Path:
# Validate path and apply glob pattern if provided
path = Path(p)
assert path.is_dir(), f"Path {path} is not a directory"
return path


def get_gpu_name(gpu_name: str | None) -> str:
# Get GPU name if available
if gpu_name is None:
try:
import torch

gpu_name = torch.cuda.get_device_name()
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this would not get the result when it is from .github/workflows/collated-reports.yml as we are running on ubuntu

except Exception as e:
print(f"Failed to get GPU name with {e}")
gpu_name = "unknown"
else:
gpu_name = gpu_name.replace(" ", "_").lower()
gpu_name = simplify_gpu_name(gpu_name, DEFAULT_GPU_NAMES)

return gpu_name
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what not make the argument mandatory?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's just to make it easier to debug manually on the machine :)



def get_commit_hash(commit_hash: str | None) -> str:
# Get commit hash if available
if commit_hash is None:
try:
commit_hash = subprocess.check_output(["git", "rev-parse", "HEAD"]).decode("utf-8").strip()
except Exception as e:
print(f"Failed to get commit hash with {e}")
commit_hash = "unknown"

return commit_hash[:7]
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

might be easier to make the argument mandatory?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same as above. It's just to make debugging easier when you're logged into the machine.



@dataclass
class Args:
path: Path
machine_type: str
gpu_name: str
commit_hash: str
job: str | None
report_repo_id: str | None


def get_arguments(args: argparse.Namespace) -> Args:
path = validate_path(args.path)
machine_type = args.machine_type
gpu_name = get_gpu_name(args.gpu_name)
commit_hash = get_commit_hash(args.commit_hash)
job = args.job
report_repo_id = args.report_repo_id
return Args(path, machine_type, gpu_name, commit_hash, job, report_repo_id)


def upload_collated_report(job: str, report_repo_id: str, filename: str):
# Alternatively we can check for the existence of the collated_reports file and upload in notification_service.py
import os

from get_previous_daily_ci import get_last_daily_ci_run
from huggingface_hub import HfApi

api = HfApi()

# if it is not a scheduled run, upload the reports to a subfolder under `report_repo_folder`
report_repo_subfolder = ""
if os.getenv("GITHUB_EVENT_NAME") != "schedule":
report_repo_subfolder = f"{os.getenv('GITHUB_RUN_NUMBER')}-{os.getenv('GITHUB_RUN_ID')}"
report_repo_subfolder = f"runs/{report_repo_subfolder}"

workflow_run = get_last_daily_ci_run(
token=os.environ["ACCESS_REPO_INFO_TOKEN"], workflow_run_id=os.getenv("GITHUB_RUN_ID")
)
workflow_run_created_time = workflow_run["created_at"]
report_repo_folder = workflow_run_created_time.split("T")[0]

if report_repo_subfolder:
report_repo_folder = f"{report_repo_folder}/{report_repo_subfolder}"

api.upload_file(
path_or_fileobj=f"ci_results_{job}/{filename}",
path_in_repo=f"{report_repo_folder}/ci_results_{job}/{filename}",
repo_id=report_repo_id,
repo_type="dataset",
token=os.getenv("TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN"),
)


if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Post process models test reports.")
parser.add_argument("--path", "-p", help="Path to the reports folder")
parser.add_argument(
"--machine-type", "-m", help="Process single or multi GPU results", choices=["single-gpu", "multi-gpu"]
)
parser.add_argument("--gpu-name", "-g", help="GPU name", default=None)
parser.add_argument("--commit-hash", "-c", help="Commit hash", default=None)
parser.add_argument("--job", "-j", help="Optional job name required for uploading reports", default=None)
parser.add_argument(
"--report-repo-id", "-r", help="Optional report repository ID required for uploading reports", default=None
)
args = get_arguments(parser.parse_args())

# Initialize accumulators for collated report
total_status_count = {
"passed": 0,
"failed": 0,
"skipped": 0,
"error": 0,
None: 0,
}
collated_report_buffer = []

path = args.path
machine_type = args.machine_type
gpu_name = args.gpu_name
commit_hash = args.commit_hash
job = args.job
report_repo_id = args.report_repo_id

# Find the origin directory based on machine type
origin = path
for p in path.iterdir():
if machine_type in p.name:
origin = p
break

# Loop through model directories and create collated reports
for model_dir in sorted(origin.iterdir()):
# Create a new entry for the model
model_name = model_dir.name.removesuffix("_test_reports")
report = {"model": model_name, "results": []}
results = []

# Read short summary
with open(model_dir / "summary_short.txt", "r") as f:
short_summary_lines = f.readlines()

# Parse short summary
for line in short_summary_lines[1:]:
status, count = parse_short_summary_line(line)
total_status_count[status] += count
if status:
result = {
"status": status,
"test": line.split(status.upper(), maxsplit=1)[1].strip(),
"count": count,
}
results.append(result)

# Add short summaries to report
report["results"] = results

collated_report_buffer.append(report)

# Write collated report
with open(f"collated_reports_{commit_hash}.json", "w") as f:
json.dump(
{
"gpu_name": gpu_name,
"machine_type": machine_type,
"commit_hash": commit_hash,
"total_status_count": total_status_count,
"results": collated_report_buffer,
},
f,
indent=2,
)

if job and report_repo_id:
upload_collated_report(job, report_repo_id, f"collated_reports_{commit_hash}.json")