Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 54 additions & 2 deletions .github/workflows/ci_e2e_llm_model_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,10 @@ name: CI - E2E Model Tests

on:
workflow_dispatch:
pull_request:
push:
branches:
- main
schedule:
- cron: "0 12 * * *"

Expand Down Expand Up @@ -107,11 +111,59 @@ jobs:
output_artifacts/output_*/consolidated_benchmark.json
output_artifacts/output_*/*.log
output_artifacts/version.txt

- name: Run the Python script
if: always()
run: python sharktank/tests/e2e/automate_pr_for_gold.py

- name: Cleanup output artifacts
if: always()
run: |
rm -rf output_artifacts
test ! -d output_artifacts && echo "Output artifacts are removed"
rm -rf output_artifacts
test ! -d output_artifacts && echo "Output artifacts are removed"

- name: Check for changes
id: changes_check
if: always()
run: |
if git diff --exit-code sharktank/tests/e2e/configs/models.json > /dev/null; then
echo "No changes detected."
echo "CHANGED_JSON=false" >> $GITHUB_OUTPUT
exit 0
else
echo "Changes detected."
echo "CHANGED_JSON=true" >> $GITHUB_OUTPUT
fi

- uses: actions/create-github-app-token@df432ceedc7162793a195dd1713ff69aefc7379e # v2.0.6
# if: ${{ env.CREATE_PULL_REQUEST_TOKEN_APP_ID != '' && env.CREATE_PULL_REQUEST_TOKEN_APP_PRIVATE_KEY != '' }}
if: always()
id: generate-token
with:
app-id: ${{ secrets.CREATE_PULL_REQUEST_TOKEN_APP_ID }}
private-key: ${{ secrets.CREATE_PULL_REQUEST_TOKEN_APP_PRIVATE_KEY }}
env:
CREATE_PULL_REQUEST_TOKEN_APP_ID: ${{ secrets.CREATE_PULL_REQUEST_TOKEN_APP_ID }}
CREATE_PULL_REQUEST_TOKEN_APP_PRIVATE_KEY: ${{ secrets.CREATE_PULL_REQUEST_TOKEN_APP_PRIVATE_KEY }}

- name: Create Pull Request
id: cpr
# if: env.CHANGED_JSON == 'true'
if: always()
uses: peter-evans/create-pull-request@271a8d0340265f705b14b6d32b9829c1cb33d45e # v7.0.8
with:
token: ${{ steps.generate-token.outputs.token || secrets.GITHUB_TOKEN }}
commit-message: Update gold values in sharktank/tests/e2e/configs/models.json
title: '[Auto-Update] Update gold values for E2E tests'
body: |
This PR updates the gold values for models in sharktank/tests/e2e/configs/models.json
branch: update-gold-values
delete-branch: true
author: shark-pr-automator[bot] <41898282+github-actions[bot]@users.noreply.github.com>
signoff: true
add-paths: |
sharktank/tests/e2e/configs/models.json
base: main

# New job to push logs to shark-ai-reports repository
push_logs:
Expand Down
107 changes: 107 additions & 0 deletions sharktank/tests/e2e/automate_pr_for_gold.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
# Copyright 2025 Advanced Micro Devices, Inc
#
# Licensed under the Apache License v2.0 with LLVM Exceptions.
# See https://llvm.org/LICENSE.txt for license information.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

import json
import os


def normalize_ascii(obj):
if isinstance(obj, str):
return obj.replace("–", "-").replace("—", "-")
elif isinstance(obj, list):
return [normalize_ascii(x) for x in obj]
elif isinstance(obj, dict):
return {k: normalize_ascii(v) for k, v in obj.items()}
return obj


def parse_log(log_file_path):
gold_prefill_time = current_prefill_time = None
gold_decode_time = current_decode_time = None
with open(log_file_path, "r") as file:
for line in file:
if "GOLD PREFILL_TIME" in line:
gold_prefill_time = float(
line.split(":")[3].strip().split(" ")[0].replace(" ", "")
)
current_prefill_time = float(
line.split(":")[4].strip().split(" ")[0].replace(" ", "")
)
if "GOLD DECODE_TIME" in line:
gold_decode_time = float(
line.split(":")[3].strip().split(" ")[0].replace(" ", "")
)
current_decode_time = float(
line.split(":")[4].strip().split(" ")[0].replace(" ", "")
)

if None in (
gold_prefill_time,
current_prefill_time,
gold_decode_time,
current_decode_time,
):
return None
return (
gold_prefill_time,
current_prefill_time,
gold_decode_time,
current_decode_time,
)


def update_json_for_conditions(json_file_path, log_path):
with open(json_file_path, "r") as f:
data = json.load(f)

updated = False
for model, details in data.items():
log_file_path = os.path.join(
log_path, f"output_{model}/e2e_testing_log_file.log"
)

if not os.path.exists(log_file_path):
print(f"Skipping {model} — log file not found.")
continue

parsed = parse_log(log_file_path)
if parsed is None:
print(f"Skipping {model} — GOLD PREFILL/DECODE values missing in log.")
continue

gold_prefill, current_prefill, gold_decode, current_decode = parsed

gold_prefill_mi325x = float(details.get("prefill_gold_mi325x", None))
gold_decode_mi325x = float(details.get("decode_gold_mi325x", None))
if gold_prefill_mi325x and gold_decode_mi325x:
if current_prefill < gold_prefill_mi325x * (1 - 0.03):
print(
f"Updating PREFILL gold for {model}: {gold_prefill_mi325x} -> {current_prefill}"
)
details["prefill_gold_mi325x"] = round(current_prefill, 3)
updated = True
if current_decode < gold_decode_mi325x * (1 - 0.06):
print(
f"Updating DECODE gold for {model}: {gold_decode_mi325x} -> {current_decode}"
)
details["decode_gold_mi325x"] = round(current_decode, 3)
updated = True

if updated:
with open(json_file_path, "w") as f:
json.dump(normalize_ascii(data), f, indent=2, ensure_ascii=False)
f.write("\n")
print(
"[IMPROVEMENT SEEN] Gold values updated in the JSON file. Creating a Pr.."
)
else:
print("No updates made — all models within tolerance.")


if __name__ == "__main__":
json_file_path = "sharktank/tests/e2e/configs/models.json"
log_path = "output_artifacts"
update_json_for_conditions(json_file_path, log_path)
32 changes: 16 additions & 16 deletions sharktank/tests/e2e/configs/models.json
Original file line number Diff line number Diff line change
Expand Up @@ -41,9 +41,9 @@
],
"output_dir": "../shark-ai/output_artifacts/",
"prefill_gold_mi325x": 175.0,
"decode_gold_mi325x": 8.50,
"prefill_gold_mi300x": 200.20,
"decode_gold_mi300x": 9.60,
"decode_gold_mi325x": 8.5,
"prefill_gold_mi300x": 200.2,
"decode_gold_mi300x": 9.6,
"extra_compile_flags_list": [
"--iree-dispatch-creation-propagate-collapse-across-expands=true",
"--iree-hip-specialize-dispatches",
Expand Down Expand Up @@ -105,7 +105,7 @@
"prefill_gold_mi325x": 263.0,
"decode_gold_mi325x": 8.23,
"prefill_gold_mi300x": 320.0,
"decode_gold_mi300x": 9.90,
"decode_gold_mi300x": 9.9,
"extra_compile_flags_list": [
"--iree-dispatch-creation-propagate-collapse-across-expands=true",
"--iree-hip-specialize-dispatches",
Expand Down Expand Up @@ -166,8 +166,8 @@
"extra_export_flags_list": [],
"output_dir": "../shark-ai/output_artifacts/",
"prefill_gold_mi325x": 1790.079,
"decode_gold_mi325x": 49.90,
"prefill_gold_mi300x": 2237.90,
"decode_gold_mi325x": 49.9,
"prefill_gold_mi300x": 2237.9,
"decode_gold_mi300x": 57.0,
"extra_compile_flags_list": [
"--iree-dispatch-creation-propagate-collapse-across-expands=true",
Expand Down Expand Up @@ -235,10 +235,10 @@
"--kv-cache-dtype=float8_e4m3fnuz"
],
"output_dir": "../shark-ai/output_artifacts/",
"prefill_gold_mi325x": 1530.90,
"decode_gold_mi325x": 43.20,
"prefill_gold_mi325x": 1530.9,
"decode_gold_mi325x": 43.2,
"prefill_gold_mi300x": 1579.0,
"decode_gold_mi300x": 48.20,
"decode_gold_mi300x": 48.2,
"extra_compile_flags_list": [
"--iree-dispatch-creation-propagate-collapse-across-expands=true",
"--iree-hip-specialize-dispatches",
Expand Down Expand Up @@ -306,7 +306,7 @@
"output_dir": "../shark-ai/output_artifacts/",
"prefill_gold_mi325x": 143.0,
"decode_gold_mi325x": 16.7,
"prefill_gold_mi300x": 160.50,
"prefill_gold_mi300x": 160.5,
"decode_gold_mi300x": 16.853,
"extra_compile_flags_list": [
"--iree-dispatch-creation-propagate-collapse-across-expands=true",
Expand Down Expand Up @@ -462,7 +462,7 @@
1
],
"extra_benchmark_flags_list": [
"-benchmark_min_warmup_time=10.0"
"--benchmark_min_warmup_time=10.0"
],
"isl": 19988,
"prefill_bs_for_time_check": 4,
Expand Down Expand Up @@ -538,7 +538,7 @@
1
],
"extra_benchmark_flags_list": [
"-benchmark_min_warmup_time=10.0"
"--benchmark_min_warmup_time=10.0"
],
"isl": 19988,
"prefill_bs_for_time_check": 4,
Expand Down Expand Up @@ -632,7 +632,7 @@
7
],
"extra_benchmark_flags_list": [
"-benchmark_min_warmup_time=10.0"
"--benchmark_min_warmup_time=10.0"
],
"isl": 19988,
"prefill_bs_for_time_check": 4,
Expand Down Expand Up @@ -726,7 +726,7 @@
7
],
"extra_benchmark_flags_list": [
"-benchmark_min_warmup_time=10.0"
"--benchmark_min_warmup_time=10.0"
],
"isl": 19988,
"prefill_bs_for_time_check": 4,
Expand Down Expand Up @@ -801,7 +801,7 @@
0
],
"extra_benchmark_flags_list": [
"-benchmark_min_warmup_time=10.0"
"--benchmark_min_warmup_time=10.0"
],
"isl": 2500,
"prefill_bs_for_time_check": 4,
Expand Down Expand Up @@ -875,7 +875,7 @@
0
],
"extra_benchmark_flags_list": [
"-benchmark_min_warmup_time=10.0"
"--benchmark_min_warmup_time=10.0"
],
"isl": 2500,
"prefill_bs_for_time_check": 4,
Expand Down
Loading