From 9eac945752577427aedbaa61061b58e850a2025b Mon Sep 17 00:00:00 2001 From: Saber Gholami Date: Tue, 10 Mar 2026 15:29:04 +0000 Subject: [PATCH 01/12] initial work for perf ci --- .github/workflows/call-jit-perf-test.yml | 158 +++++++++++ .github/workflows/schedule-nightly.yml | 18 +- .../workflows/workflow-run-collect-data.yml | 2 + test/ttnn-jit/perf_ci/perf_tests.py | 101 +++++++ test/ttnn-jit/perf_ci/run_perf_collect.sh | 72 +++++ .../perf_ci/summarize_perf_results.py | 254 ++++++++++++++++++ 6 files changed, 602 insertions(+), 3 deletions(-) create mode 100644 .github/workflows/call-jit-perf-test.yml create mode 100644 test/ttnn-jit/perf_ci/perf_tests.py create mode 100755 test/ttnn-jit/perf_ci/run_perf_collect.sh create mode 100755 test/ttnn-jit/perf_ci/summarize_perf_results.py diff --git a/.github/workflows/call-jit-perf-test.yml b/.github/workflows/call-jit-perf-test.yml new file mode 100644 index 00000000000..dc82d50abd4 --- /dev/null +++ b/.github/workflows/call-jit-perf-test.yml @@ -0,0 +1,158 @@ +name: JIT Perf Test + +on: + workflow_call: + inputs: + docker_image: + description: 'Docker image for the build' + required: true + type: string + +permissions: + checks: write + packages: write + +env: + TRACY_NO_INVARIANT_CHECK: 1 + TRACY_NO_ISA_EXTENSIONS: 1 + +jobs: + run-jit-perf: + timeout-minutes: 120 + name: "JIT Perf Collection" + + runs-on: + - n150 + - in-service + + container: + image: ${{ inputs.docker_image }} + options: --device /dev/tenstorrent + volumes: + - /dev/hugepages:/dev/hugepages + - /dev/hugepages-1G:/dev/hugepages-1G + - /etc/udev/rules.d:/etc/udev/rules.d + - /lib/modules:/lib/modules + - /opt/tt_metal_infra/provisioning/provisioning_env:/opt/tt_metal_infra/provisioning/provisioning_env + + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Fetch job id + id: fetch-job-id + uses: tenstorrent/tt-github-actions/.github/actions/job_id@main + with: + job_name: "JIT Perf Collection" + + - name: Set reusable strings + id: strings + shell: bash + env: + JOB_ID: ${{ steps.fetch-job-id.outputs.job_id }} + run: | + echo "work-dir=$(pwd)" >> "$GITHUB_OUTPUT" + echo "build-output-dir=$(pwd)/build" >> "$GITHUB_OUTPUT" + echo "install-output-dir=$(pwd)/install" >> "$GITHUB_OUTPUT" + echo "perf-output-dir=$(pwd)/jit_perf_results" >> "$GITHUB_OUTPUT" + + - name: Git safe dir + run: git config --global --add safe.directory ${{ steps.strings.outputs.work-dir }} + + - name: Use install artifacts + uses: tenstorrent/tt-forge/.github/actions/download-artifact@main + with: + name: install-artifacts-tracy + path: install + github_token: ${{ secrets.GITHUB_TOKEN }} + + - name: Remove existing whls files + shell: bash + run: | + rm -f *.whl + + - name: Download ttrt whls + uses: actions/download-artifact@v4 + with: + name: ttrt-whl-tracy + + - name: Install ttrt whls + shell: bash + run: | + source env/activate + pip show ttrt && pip uninstall -y ttrt + pip install ttrt*.whl --upgrade + + - name: Download Build Artifacts + uses: tenstorrent/tt-forge/.github/actions/download-artifact@main + with: + name: build-artifacts-tracy + path: build + github_token: ${{ secrets.GITHUB_TOKEN }} + + - name: Generate system descriptor + shell: bash + run: | + source env/activate + ttrt query --save-artifacts + + - name: Download and install ttmlir and ttnn-jit wheels + shell: bash + env: + GH_TOKEN: ${{ secrets.GH_TOKEN || github.token }} + run: | + source env/activate + rm -f ttmlir*.whl ttnn_jit*.whl + + gh run download ${{ github.run_id }} --repo ${{ github.repository }} --name ttmlir-whl-tracy + gh run download ${{ github.run_id }} --repo ${{ github.repository }} --name ttnn-jit-whl-tracy + + pip show ttmlir &> /dev/null && pip uninstall -y ttmlir + pip show ttnn-jit &> /dev/null && pip uninstall -y ttnn-jit + pip install ttnn_jit*.whl --find-links . --upgrade + + - name: Set up tt-triage + shell: bash + run: | + TT_METAL_VERSION=$(grep 'set(TT_METAL_VERSION' third_party/CMakeLists.txt | sed 's/.*"\(.*\)".*/\1/') + + mkdir -p tt-triage + curl -L "https://github.com/tenstorrent/tt-metal/archive/${TT_METAL_VERSION}.tar.gz" \ + | tar -xz -C tt-triage --strip-components=1 \ + tt-metal-${TT_METAL_VERSION}/scripts/ttexalens_ref.txt \ + tt-metal-${TT_METAL_VERSION}/tools/tt-triage.py \ + tt-metal-${TT_METAL_VERSION}/tools/triage \ + tt-metal-${TT_METAL_VERSION}/tt_metal + + echo "TT_METAL_OPERATION_TIMEOUT_SECONDS=${{ vars.TT_METAL_OPERATION_TIMEOUT_SECONDS || 300 }}" >> $GITHUB_ENV + echo "TT_METAL_DISPATCH_TIMEOUT_COMMAND_TO_EXECUTE=python $(pwd)/tt-triage/tools/tt-triage.py 1>&2" >> $GITHUB_ENV + + - name: Run JIT perf collection + shell: bash + run: | + source env/activate + export PYTHONPATH="${{ steps.strings.outputs.install-output-dir }}/tt-metal/ttnn:${{ steps.strings.outputs.install-output-dir }}/tt-metal" + export LD_LIBRARY_PATH="${{ steps.strings.outputs.install-output-dir }}/lib:${TTMLIR_TOOLCHAIN_DIR}/lib:${LD_LIBRARY_PATH}" + export SYSTEM_DESC_PATH="${GITHUB_WORKSPACE}/ttrt-artifacts/system_desc.ttsys" + export TT_METAL_RUNTIME_ROOT="${{ steps.strings.outputs.install-output-dir }}/tt-metal" + export TT_METAL_HOME="${{ steps.strings.outputs.work-dir }}/third_party/tt-metal/src/tt-metal" + ln -sf ${{ steps.strings.outputs.install-output-dir }} ${{ steps.strings.outputs.build-output-dir }} + + test/ttnn-jit/perf_ci/run_perf_collect.sh ${{ steps.strings.outputs.perf-output-dir }} + + - name: Upload JIT perf summary + uses: actions/upload-artifact@v4 + if: success() || failure() + with: + name: jit-perf-summary-${{ steps.fetch-job-id.outputs.job_id }} + path: ${{ steps.strings.outputs.perf-output-dir }}/jit_perf_summary.json + if-no-files-found: warn + + - name: Upload JIT perf reports + uses: ./.github/actions/collect-and-upload-perf-reports + if: success() || failure() + with: + reports_dir: ${{ steps.strings.outputs.perf-output-dir }} + perf_report_path: ${{ steps.strings.outputs.work-dir }}/perf_reports + artifact_name: jit-perf-reports-${{ steps.fetch-job-id.outputs.job_id }} diff --git a/.github/workflows/schedule-nightly.yml b/.github/workflows/schedule-nightly.yml index e367d7da44a..37986f428ed 100644 --- a/.github/workflows/schedule-nightly.yml +++ b/.github/workflows/schedule-nightly.yml @@ -32,16 +32,19 @@ jobs: runner: ${{ needs.prepare-run.outputs.runner }} sh_builder: ${{ fromJson(needs.prepare-run.outputs.sh_builder) }} component_matrix: ${{ needs.prepare-run.outputs.build_matrix }} + # TODO: Re-enable wheels-build after JIT perf CI debugging is complete. wheels-build: + if: false needs: [ prepare-run, build-image, release-build ] # release-build required so ttnn-jit wheel is built uses: ./.github/workflows/call-build-wheels.yml secrets: inherit with: docker-tag: ${{ needs.build-image.outputs.docker-tag }} docker_image: ${{ needs.build-image.outputs.docker-image }} + # TODO: Re-enable test after JIT perf CI debugging is complete. test: + if: false needs: [ prepare-run, build-image, release-build ] - if: needs.prepare-run.outputs.skip_build != 'true' uses: ./.github/workflows/call-test.yml secrets: inherit with: @@ -49,8 +52,16 @@ jobs: test_matrix: ${{ needs.prepare-run.outputs.test_matrix }} timeout: ${{ fromJson(needs.prepare-run.outputs.test_timeout) }} + jit-perf-test: + needs: [ build-image, release-build ] + uses: ./.github/workflows/call-jit-perf-test.yml + secrets: inherit + with: + docker_image: ${{ needs.build-image.outputs.docker-image }} + + # TODO: Re-enable fail-notify after JIT perf CI debugging is complete. fail-notify: - if: always() + if: false needs: - prepare-run - build-image @@ -71,8 +82,9 @@ jobs: with: jobs: ${{ toJSON(needs) }} + # TODO: Re-enable fail-send-msg after JIT perf CI debugging is complete. fail-send-msg: - if: always() + if: false needs: - fail-notify - test diff --git a/.github/workflows/workflow-run-collect-data.yml b/.github/workflows/workflow-run-collect-data.yml index 2567ad1dd78..add2cac7cd6 100644 --- a/.github/workflows/workflow-run-collect-data.yml +++ b/.github/workflows/workflow-run-collect-data.yml @@ -26,6 +26,8 @@ jobs: run_attempt: ${{ github.event.workflow_run.run_attempt }} sftp_host: ${{ secrets.SFTP_CICD_WRITER_HOSTNAME }} sftp_user: ${{ secrets.SFTP_CICD_WRITER_USERNAME }} + sftp_perf_host: ${{ secrets.SFTP_PERF_WRITER_HOSTNAME }} + sftp_perf_user: ${{ secrets.SFTP_PERF_WRITER_USERNAME }} sftp_optest_host: ${{ secrets.SFTP_OP_TEST_WRITER_HOSTNAME }} sftp_optest_user: ${{ secrets.SFTP_OP_TEST_WRITER_USERNAME }} ssh-private-key: ${{ secrets.SFTP_CICD_WRITER_KEY }} diff --git a/test/ttnn-jit/perf_ci/perf_tests.py b/test/ttnn-jit/perf_ci/perf_tests.py new file mode 100644 index 00000000000..b157cc31035 --- /dev/null +++ b/test/ttnn-jit/perf_ci/perf_tests.py @@ -0,0 +1,101 @@ +# SPDX-FileCopyrightText: (c) 2025 Tenstorrent AI ULC +# +# SPDX-License-Identifier: Apache-2.0 + +import ttnn +import ttnn_jit +import torch + +import pytest + +from op_definitions import abs, exp, add, mul, matmul + +# Memory configs that pass for all ops and both JIT and non-JIT. +# DRAM interleaved works for matmul (requires interleaved) and all elementwise ops. +# L1 interleaved is not used: JIT runtime fails with RuntimeError on L1 interleaved +# inputs (submit path), so we only test DRAM interleaved for paired JIT vs TTNN comparison. +MEMORY_CONFIGS = [ + (ttnn.DRAM_MEMORY_CONFIG, "dram_interleaved"), +] + + +def is_unary(op): + return op == abs or op == exp + + +@pytest.mark.parametrize( + "h, w", + [ + (256, 256), + ], +) +@pytest.mark.parametrize( + "op", + [ + abs, + exp, + add, + mul, + matmul, + ], + ids=[ + "abs", + "exp", + "add", + "mul", + "matmul", + ], +) +@pytest.mark.parametrize( + "dtype, ttnn_dtype", + [ + (torch.bfloat16, ttnn.DataType.BFLOAT16), + (torch.bfloat16, ttnn.DataType.BFLOAT8_B), + ], + ids=["bf16", "bfp8"], +) +@pytest.mark.parametrize( + "memory_config, memory_config_id", + MEMORY_CONFIGS, + ids=[id for _, id in MEMORY_CONFIGS], +) +@pytest.mark.parametrize( + "jit_enabled", + [ + True, + False, + ], +) +def test_op_compare( + h, w, op, dtype, ttnn_dtype, memory_config, memory_config_id, jit_enabled +): + device = ttnn.open_device(device_id=0) + torch_tensor_a = torch.rand((h, w), dtype=dtype) * 100 + torch_tensor_b = torch.rand((h, w), dtype=dtype) * 100 + + input_a = ttnn.from_torch( + torch_tensor_a, + dtype=ttnn_dtype, + layout=ttnn.TILE_LAYOUT, + device=device, + memory_config=memory_config, + ) + input_b = ttnn.from_torch( + torch_tensor_b, + dtype=ttnn_dtype, + layout=ttnn.TILE_LAYOUT, + device=device, + memory_config=memory_config, + ) + + function_to_test = ( + ttnn_jit.jit(debug=True, enable_cache=True)(op) if jit_enabled else op + ) + output_tensor = ( + function_to_test(input_a) + if is_unary(op) + else function_to_test(input_a, input_b) + ) + + print(f"output_tensor\n: {output_tensor}") + ttnn.close_device(device) diff --git a/test/ttnn-jit/perf_ci/run_perf_collect.sh b/test/ttnn-jit/perf_ci/run_perf_collect.sh new file mode 100755 index 00000000000..c7fb07a5283 --- /dev/null +++ b/test/ttnn-jit/perf_ci/run_perf_collect.sh @@ -0,0 +1,72 @@ +#!/usr/bin/env bash +# SPDX-FileCopyrightText: (c) 2025 Tenstorrent AI ULC +# +# SPDX-License-Identifier: Apache-2.0 +# +# Run each parametrized test in perf_tests.py under the device profiler (tracy) +# and dump results into a directory per test. Use TT_METAL_PROFILER_DIR so each +# run writes to a known subdir. At the end, runs summarize_perf_results.py to +# produce OUT_DIR/jit_perf_summary.json. +# +# Usage: +# ./test/ttnn-jit/perf_ci/run_perf_collect.sh [OUT_DIR] +# +# Example: +# ./test/ttnn-jit/perf_ci/run_perf_collect.sh +# ./test/ttnn-jit/perf_ci/run_perf_collect.sh generated/jit_perf_reports/my_run + +set -e + +# Script lives in test/ttnn-jit/perf_ci/; go up three levels to repo root +REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../../.." && pwd)" +cd "$REPO_ROOT" + +# Optional: activate venv if present +if [ -f env/activate ]; then + # shellcheck source=/dev/null + source env/activate +fi + +OUT_DIR="${1:-generated/jit_perf_reports/run_$(date +%Y%m%d_%H%M%S)}" +mkdir -p "$OUT_DIR" +OUT_DIR="$(cd "$OUT_DIR" && pwd)" + +# Collect test ids from perf_tests.py (whatever is parametrized there) +collect_out=$(mktemp) +if ! pytest test/ttnn-jit/perf_ci/perf_tests.py --collect-only -q >"$collect_out" 2>&1; then + echo "Error: pytest collect failed:" >&2 + cat "$collect_out" >&2 + rm -f "$collect_out" + exit 1 +fi +TESTS=($(sed -n 's/.*test_op_compare\[\(.*\)\]/\1/p' <"$collect_out")) +if [ ${#TESTS[@]} -eq 0 ]; then + echo "Error: no test_op_compare[*] tests found in test/ttnn-jit/perf_ci/perf_tests.py. Pytest collect output:" >&2 + cat "$collect_out" >&2 + rm -f "$collect_out" + exit 1 +fi +rm -f "$collect_out" +echo "Collected ${#TESTS[@]} tests from perf_tests.py" + +export TT_METAL_DEVICE_PROFILER=1 + +for tid in "${TESTS[@]}"; do + echo "==============================================" + echo "Running test_op_compare[$tid] ..." + echo "==============================================" + export TT_METAL_PROFILER_DIR="$OUT_DIR/$tid" + mkdir -p "$TT_METAL_PROFILER_DIR" + if ! python -m tracy -m -r -p "pytest test/ttnn-jit/perf_ci/perf_tests.py::test_op_compare[$tid]"; then + echo "Warning: test_op_compare[$tid] exited with non-zero status (results may still be present)." + fi +done + +echo "" +echo "Results written under: $OUT_DIR" +echo "Summarizing..." +if python test/ttnn-jit/perf_ci/summarize_perf_results.py "$OUT_DIR" -o "$OUT_DIR/jit_perf_summary.json"; then + echo "Summary written to $OUT_DIR/jit_perf_summary.json" +else + echo "Warning: summarizer exited with an error (run dir may be partial)." >&2 +fi diff --git a/test/ttnn-jit/perf_ci/summarize_perf_results.py b/test/ttnn-jit/perf_ci/summarize_perf_results.py new file mode 100755 index 00000000000..30a74d389a5 --- /dev/null +++ b/test/ttnn-jit/perf_ci/summarize_perf_results.py @@ -0,0 +1,254 @@ +#!/usr/bin/env python3 +# SPDX-FileCopyrightText: (c) 2025 Tenstorrent AI ULC +# +# SPDX-License-Identifier: Apache-2.0 +# +# Read all ops_perf_results_*.csv under a run directory (from run_perf_collect.sh), +# group JIT vs non-JIT by case (op, shape, dtype, memory_config_id) and write one +# entry per case with jit_duration_ns, ttnn_duration_ns, and perf_pct_ttnn. +# math_fidelity is not part of the key so JIT (e.g. HiFi4) and TTNN (e.g. HiFi2) pair. +# (100 = same, <100 = JIT slower, >100 = JIT faster). Suitable for Superset. +# +# Usage: +# python test/ttnn-jit/perf_ci/summarize_perf_results.py RUN_DIR [-o OUTPUT.json] +# +# Example: +# python test/ttnn-jit/perf_ci/summarize_perf_results.py generated/jit_perf_reports/run_20250309_123456 -o jit_perf_summary.json + +import argparse +import csv +import json +import sys +from pathlib import Path +from typing import Any, Optional + +DEVICE_KERNEL_DURATION_COL = "DEVICE KERNEL DURATION [ns]" +MATH_FIDELITY_COL = "MATH FIDELITY" +OUTPUT_0_DATATYPE_COL = "OUTPUT_0_DATATYPE" +INPUT_0_DATATYPE_COL = "INPUT_0_DATATYPE" + + +def find_result_csvs(run_dir: Path): + """Yield (test_id, csv_path) for each ops_perf_results_*.csv under run_dir.""" + run_dir = run_dir.resolve() + if not run_dir.is_dir(): + return + for test_dir in run_dir.iterdir(): + if not test_dir.is_dir(): + continue + test_id = test_dir.name + reports_dir = test_dir / "reports" + if not reports_dir.is_dir(): + continue + for ts_dir in reports_dir.iterdir(): + if not ts_dir.is_dir(): + continue + for csv_path in ts_dir.glob("ops_perf_results_*.csv"): + yield test_id, csv_path + + +# Known memory_config suffixes in test_id (e.g. ...-dram_interleaved). +MEMORY_CONFIG_IDS = ("dram_interleaved", "l1_interleaved") + + +def parse_test_id(test_id: str) -> Optional[dict]: + """ + Parse test_id into jit, op, h, w, and optionally memory_config_id. + Supports: 'True-abs-256-256' (4 parts), 'True-bf16-abs-256-256' (5), + 'True-dram_interleaved-bf16-abs-256-256' (6). + """ + parts = test_id.split("-") + if len(parts) < 4: + return None + jit = parts[0].lower() == "true" + memory_config_id: Optional[str] = None + if len(parts) == 6 and parts[1] in MEMORY_CONFIG_IDS: + memory_config_id = parts[1] + op = parts[3] + try: + h, w = int(parts[4]), int(parts[5]) + except (ValueError, IndexError): + return None + elif len(parts) == 5: + op = parts[2] + try: + h, w = int(parts[3]), int(parts[4]) + except (ValueError, IndexError): + return None + elif len(parts) == 4: + op = parts[1] + try: + h, w = int(parts[2]), int(parts[3]) + except (ValueError, IndexError): + return None + else: + try: + h, w = int(parts[-2]), int(parts[-1]) + except (ValueError, IndexError): + return None + op = "-".join(parts[1:-2]) + return {"jit": jit, "op": op, "h": h, "w": w, "memory_config_id": memory_config_id} + + +def read_csv_duration_and_meta(csv_path: Path) -> Optional[tuple[int, str, str]]: + """ + Read CSV: sum DEVICE KERNEL DURATION [ns], and from first data row return + (duration_ns, dtype, math_fidelity). dtype/math_fidelity may be empty if + column missing. + """ + total = 0 + found_duration = False + dtype = "" + math_fidelity = "" + with open(csv_path, newline="", encoding="utf-8") as f: + reader = csv.DictReader(f) + fieldnames = reader.fieldnames or [] + for row in reader: + if DEVICE_KERNEL_DURATION_COL in fieldnames: + val = row.get(DEVICE_KERNEL_DURATION_COL, "").strip() + if val and val != "-": + try: + total += int(float(val)) + found_duration = True + except (ValueError, TypeError): + pass + if not dtype and ( + OUTPUT_0_DATATYPE_COL in fieldnames + or INPUT_0_DATATYPE_COL in fieldnames + ): + dtype = ( + row.get(OUTPUT_0_DATATYPE_COL) + or row.get(INPUT_0_DATATYPE_COL) + or "" + ).strip() + if not math_fidelity and MATH_FIDELITY_COL in fieldnames: + math_fidelity = (row.get(MATH_FIDELITY_COL) or "").strip() + if not found_duration: + return None + return (total, dtype, math_fidelity) + + +def make_case_key( + op: str, h: int, w: int, dtype: str, memory_config_id: Optional[str] +) -> tuple: + """Immutable key to group JIT and non-JIT runs of the same case. Excludes math_fidelity so JIT and TTNN runs (which may report different fidelities) pair into one entry.""" + return (op, h, w, dtype, memory_config_id or "") + + +def main(): + parser = argparse.ArgumentParser( + description="Summarize JIT perf run CSVs into one entry per (op, shape, dtype, memory_config) with JIT vs TTNN comparison." + ) + parser.add_argument( + "run_dir", + type=Path, + help="Directory produced by run_perf_collect.sh (contains test_id/reports/...)", + ) + parser.add_argument( + "-o", + "--output", + type=Path, + default=None, + help="Output JSON path (default: RUN_DIR/jit_perf_summary.json)", + ) + parser.add_argument( + "-q", + "--quiet", + action="store_true", + help="Do not print progress", + ) + args = parser.parse_args() + + run_dir = args.run_dir.resolve() + if not run_dir.is_dir(): + print(f"Error: not a directory: {run_dir}", file=sys.stderr) + sys.exit(1) + + out_path = args.output or (run_dir / "jit_perf_summary.json") + + # Raw rows: one per CSV (test_id, jit, op, h, w, duration_ns, dtype, math_fidelity) + raw: list[dict[str, Any]] = [] + for test_id, csv_path in find_result_csvs(run_dir): + parsed = parse_test_id(test_id) + if not parsed: + if not args.quiet: + print(f"Skip (bad test_id): {test_id}", file=sys.stderr) + continue + result = read_csv_duration_and_meta(csv_path) + if result is None: + if not args.quiet: + print(f"Skip (no duration): {csv_path}", file=sys.stderr) + continue + duration_ns, dtype, math_fidelity = result + raw.append( + { + "test_id": test_id, + "jit": parsed["jit"], + "op": parsed["op"], + "h": parsed["h"], + "w": parsed["w"], + "memory_config_id": parsed.get("memory_config_id"), + "duration_ns": duration_ns, + "dtype": dtype, + "math_fidelity": math_fidelity, + "csv_path": str(csv_path), + } + ) + if not args.quiet: + print( + f" {test_id}: {duration_ns} ns (dtype={dtype!r}, math_fidelity={math_fidelity!r})" + ) + + # Group by case key (op, h, w, dtype, memory_config_id) so JIT and TTNN pair even when math_fidelity differs (e.g. matmul HiFi4 vs HiFi2) + groups: dict[tuple, dict[str, Any]] = {} + for r in raw: + key = make_case_key( + r["op"], r["h"], r["w"], r["dtype"], r.get("memory_config_id") + ) + if key not in groups: + groups[key] = { + "op": r["op"], + "h": r["h"], + "w": r["w"], + "shape": f"{r['h']}x{r['w']}", + "dtype": r["dtype"], + "math_fidelity": r["math_fidelity"], + "math_fidelity_ttnn": None, + "memory_config_id": r.get("memory_config_id") or "", + "jit_duration_ns": None, + "ttnn_duration_ns": None, + "perf_pct_ttnn": None, + "jit_csv_path": None, + "ttnn_csv_path": None, + } + g = groups[key] + if r["jit"]: + g["jit_duration_ns"] = r["duration_ns"] + g["jit_csv_path"] = r["csv_path"] + g["math_fidelity"] = r["math_fidelity"] + else: + g["ttnn_duration_ns"] = r["duration_ns"] + g["ttnn_csv_path"] = r["csv_path"] + g["math_fidelity_ttnn"] = r["math_fidelity"] + + # Compute perf_pct_ttnn: (ttnn_duration / jit_duration) * 100 → 100 = same, <100 = JIT slower, >100 = JIT faster + out_rows: list[dict[str, Any]] = [] + for key in sorted(groups.keys()): + g = groups[key] + jit_ns = g["jit_duration_ns"] + ttnn_ns = g["ttnn_duration_ns"] + if jit_ns is not None and ttnn_ns is not None and jit_ns > 0: + g["perf_pct_ttnn"] = round((ttnn_ns / jit_ns) * 100.0, 2) + out_rows.append(g) + + out_path.parent.mkdir(parents=True, exist_ok=True) + with open(out_path, "w", encoding="utf-8") as f: + json.dump(out_rows, f, indent=2) + + if not args.quiet: + print(f"Wrote {len(out_rows)} case(s) to {out_path}") + return 0 + + +if __name__ == "__main__": + sys.exit(main()) From 1da63077d00a0eeaecbea8a6c211f796a4a98f7c Mon Sep 17 00:00:00 2001 From: Saber Gholami Date: Tue, 10 Mar 2026 16:06:56 +0000 Subject: [PATCH 02/12] python path issue --- test/ttnn-jit/perf_ci/run_perf_collect.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/ttnn-jit/perf_ci/run_perf_collect.sh b/test/ttnn-jit/perf_ci/run_perf_collect.sh index c7fb07a5283..647f1397f24 100755 --- a/test/ttnn-jit/perf_ci/run_perf_collect.sh +++ b/test/ttnn-jit/perf_ci/run_perf_collect.sh @@ -21,8 +21,8 @@ set -e REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../../.." && pwd)" cd "$REPO_ROOT" -# Optional: activate venv if present -if [ -f env/activate ]; then +# Activate venv if not already active +if [ -z "$VIRTUAL_ENV" ] && [ -f env/activate ]; then # shellcheck source=/dev/null source env/activate fi From 81f47dfeb2d7e8c5adea30aa7f915d6a1b92f576 Mon Sep 17 00:00:00 2001 From: Saber Gholami Date: Tue, 10 Mar 2026 16:36:13 +0000 Subject: [PATCH 03/12] fix ttnn not found --- .github/workflows/call-jit-perf-test.yml | 3 ++- test/ttnn-jit/perf_ci/run_perf_collect.sh | 10 ++++++++-- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/.github/workflows/call-jit-perf-test.yml b/.github/workflows/call-jit-perf-test.yml index dc82d50abd4..ed35711565a 100644 --- a/.github/workflows/call-jit-perf-test.yml +++ b/.github/workflows/call-jit-perf-test.yml @@ -130,9 +130,10 @@ jobs: - name: Run JIT perf collection shell: bash + env: + INSTALL_DIR: ${{ steps.strings.outputs.install-output-dir }} run: | source env/activate - export PYTHONPATH="${{ steps.strings.outputs.install-output-dir }}/tt-metal/ttnn:${{ steps.strings.outputs.install-output-dir }}/tt-metal" export LD_LIBRARY_PATH="${{ steps.strings.outputs.install-output-dir }}/lib:${TTMLIR_TOOLCHAIN_DIR}/lib:${LD_LIBRARY_PATH}" export SYSTEM_DESC_PATH="${GITHUB_WORKSPACE}/ttrt-artifacts/system_desc.ttsys" export TT_METAL_RUNTIME_ROOT="${{ steps.strings.outputs.install-output-dir }}/tt-metal" diff --git a/test/ttnn-jit/perf_ci/run_perf_collect.sh b/test/ttnn-jit/perf_ci/run_perf_collect.sh index 647f1397f24..5f74aa9c07e 100755 --- a/test/ttnn-jit/perf_ci/run_perf_collect.sh +++ b/test/ttnn-jit/perf_ci/run_perf_collect.sh @@ -21,12 +21,18 @@ set -e REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../../.." && pwd)" cd "$REPO_ROOT" -# Activate venv if not already active -if [ -z "$VIRTUAL_ENV" ] && [ -f env/activate ]; then +# Activate venv if present +if [ -f env/activate ]; then # shellcheck source=/dev/null source env/activate fi +# In CI, INSTALL_DIR points to extracted install artifacts containing tt-metal. +# Set PYTHONPATH so pytest can find the ttnn module (same as ttnn_jit.sh). +if [ -n "$INSTALL_DIR" ]; then + export PYTHONPATH="$INSTALL_DIR/tt-metal/ttnn:$INSTALL_DIR/tt-metal" +fi + OUT_DIR="${1:-generated/jit_perf_reports/run_$(date +%Y%m%d_%H%M%S)}" mkdir -p "$OUT_DIR" OUT_DIR="$(cd "$OUT_DIR" && pwd)" From 82d7c9806297cfb2c0bde3384409e0dc19911e0c Mon Sep 17 00:00:00 2001 From: Saber Gholami Date: Tue, 10 Mar 2026 16:39:06 +0000 Subject: [PATCH 04/12] Revert "fix ttnn not found" This reverts commit 81f47dfeb2d7e8c5adea30aa7f915d6a1b92f576. --- .github/workflows/call-jit-perf-test.yml | 3 +-- test/ttnn-jit/perf_ci/run_perf_collect.sh | 10 ++-------- 2 files changed, 3 insertions(+), 10 deletions(-) diff --git a/.github/workflows/call-jit-perf-test.yml b/.github/workflows/call-jit-perf-test.yml index ed35711565a..dc82d50abd4 100644 --- a/.github/workflows/call-jit-perf-test.yml +++ b/.github/workflows/call-jit-perf-test.yml @@ -130,10 +130,9 @@ jobs: - name: Run JIT perf collection shell: bash - env: - INSTALL_DIR: ${{ steps.strings.outputs.install-output-dir }} run: | source env/activate + export PYTHONPATH="${{ steps.strings.outputs.install-output-dir }}/tt-metal/ttnn:${{ steps.strings.outputs.install-output-dir }}/tt-metal" export LD_LIBRARY_PATH="${{ steps.strings.outputs.install-output-dir }}/lib:${TTMLIR_TOOLCHAIN_DIR}/lib:${LD_LIBRARY_PATH}" export SYSTEM_DESC_PATH="${GITHUB_WORKSPACE}/ttrt-artifacts/system_desc.ttsys" export TT_METAL_RUNTIME_ROOT="${{ steps.strings.outputs.install-output-dir }}/tt-metal" diff --git a/test/ttnn-jit/perf_ci/run_perf_collect.sh b/test/ttnn-jit/perf_ci/run_perf_collect.sh index 5f74aa9c07e..647f1397f24 100755 --- a/test/ttnn-jit/perf_ci/run_perf_collect.sh +++ b/test/ttnn-jit/perf_ci/run_perf_collect.sh @@ -21,18 +21,12 @@ set -e REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../../.." && pwd)" cd "$REPO_ROOT" -# Activate venv if present -if [ -f env/activate ]; then +# Activate venv if not already active +if [ -z "$VIRTUAL_ENV" ] && [ -f env/activate ]; then # shellcheck source=/dev/null source env/activate fi -# In CI, INSTALL_DIR points to extracted install artifacts containing tt-metal. -# Set PYTHONPATH so pytest can find the ttnn module (same as ttnn_jit.sh). -if [ -n "$INSTALL_DIR" ]; then - export PYTHONPATH="$INSTALL_DIR/tt-metal/ttnn:$INSTALL_DIR/tt-metal" -fi - OUT_DIR="${1:-generated/jit_perf_reports/run_$(date +%Y%m%d_%H%M%S)}" mkdir -p "$OUT_DIR" OUT_DIR="$(cd "$OUT_DIR" && pwd)" From f93edfed5fdcf134a8aeee9e749ca4c28dd9b3e6 Mon Sep 17 00:00:00 2001 From: Saber Gholami Date: Tue, 10 Mar 2026 16:46:23 +0000 Subject: [PATCH 05/12] tracy fix --- .github/workflows/call-jit-perf-test.yml | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/.github/workflows/call-jit-perf-test.yml b/.github/workflows/call-jit-perf-test.yml index dc82d50abd4..836d0a66e72 100644 --- a/.github/workflows/call-jit-perf-test.yml +++ b/.github/workflows/call-jit-perf-test.yml @@ -128,6 +128,14 @@ jobs: echo "TT_METAL_OPERATION_TIMEOUT_SECONDS=${{ vars.TT_METAL_OPERATION_TIMEOUT_SECONDS || 300 }}" >> $GITHUB_ENV echo "TT_METAL_DISPATCH_TIMEOUT_COMMAND_TO_EXECUTE=python $(pwd)/tt-triage/tools/tt-triage.py 1>&2" >> $GITHUB_ENV + - name: Set up tracy profiler tools + shell: bash + run: | + TRACY_BIN_DIR="${{ steps.strings.outputs.work-dir }}/third_party/tt-metal/src/tt-metal/build/tools/profiler/bin" + mkdir -p "$TRACY_BIN_DIR" + cp ${{ steps.strings.outputs.build-output-dir }}/python_packages/ttrt/runtime/capture-release "$TRACY_BIN_DIR/" + cp ${{ steps.strings.outputs.build-output-dir }}/python_packages/ttrt/runtime/csvexport-release "$TRACY_BIN_DIR/" + - name: Run JIT perf collection shell: bash run: | From 802d5a9278cbac67a2a6284bf15712e45db654fd Mon Sep 17 00:00:00 2001 From: Saber Gholami Date: Tue, 10 Mar 2026 18:48:25 +0000 Subject: [PATCH 06/12] fix upload issue --- .github/workflows/call-jit-perf-test.yml | 8 ++++---- test/ttnn-jit/perf_ci/perf_tests.py | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/call-jit-perf-test.yml b/.github/workflows/call-jit-perf-test.yml index 836d0a66e72..09d4fcc43b2 100644 --- a/.github/workflows/call-jit-perf-test.yml +++ b/.github/workflows/call-jit-perf-test.yml @@ -158,9 +158,9 @@ jobs: if-no-files-found: warn - name: Upload JIT perf reports - uses: ./.github/actions/collect-and-upload-perf-reports + uses: actions/upload-artifact@v4 if: success() || failure() with: - reports_dir: ${{ steps.strings.outputs.perf-output-dir }} - perf_report_path: ${{ steps.strings.outputs.work-dir }}/perf_reports - artifact_name: jit-perf-reports-${{ steps.fetch-job-id.outputs.job_id }} + name: jit-perf-reports-${{ steps.fetch-job-id.outputs.job_id }} + path: ${{ steps.strings.outputs.perf-output-dir }} + if-no-files-found: warn diff --git a/test/ttnn-jit/perf_ci/perf_tests.py b/test/ttnn-jit/perf_ci/perf_tests.py index b157cc31035..fb2e2f7ffe4 100644 --- a/test/ttnn-jit/perf_ci/perf_tests.py +++ b/test/ttnn-jit/perf_ci/perf_tests.py @@ -26,7 +26,7 @@ def is_unary(op): @pytest.mark.parametrize( "h, w", [ - (256, 256), + (2048, 2048), ], ) @pytest.mark.parametrize( From aa8a7653202f8b9c4828960ce83def10466b64ca Mon Sep 17 00:00:00 2001 From: Saber Gholami Date: Wed, 11 Mar 2026 15:24:17 +0000 Subject: [PATCH 07/12] fix json file naming --- .github/workflows/call-jit-perf-test.yml | 4 +++- test/ttnn-jit/perf_ci/run_perf_collect.sh | 8 +++++--- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/.github/workflows/call-jit-perf-test.yml b/.github/workflows/call-jit-perf-test.yml index 09d4fcc43b2..cb34ec6c185 100644 --- a/.github/workflows/call-jit-perf-test.yml +++ b/.github/workflows/call-jit-perf-test.yml @@ -138,6 +138,8 @@ jobs: - name: Run JIT perf collection shell: bash + env: + JOB_ID: ${{ steps.fetch-job-id.outputs.job_id }} run: | source env/activate export PYTHONPATH="${{ steps.strings.outputs.install-output-dir }}/tt-metal/ttnn:${{ steps.strings.outputs.install-output-dir }}/tt-metal" @@ -154,7 +156,7 @@ jobs: if: success() || failure() with: name: jit-perf-summary-${{ steps.fetch-job-id.outputs.job_id }} - path: ${{ steps.strings.outputs.perf-output-dir }}/jit_perf_summary.json + path: ${{ steps.strings.outputs.perf-output-dir }}/perf_jit_summary_${{ steps.fetch-job-id.outputs.job_id }}.json if-no-files-found: warn - name: Upload JIT perf reports diff --git a/test/ttnn-jit/perf_ci/run_perf_collect.sh b/test/ttnn-jit/perf_ci/run_perf_collect.sh index 647f1397f24..4a7a0118e45 100755 --- a/test/ttnn-jit/perf_ci/run_perf_collect.sh +++ b/test/ttnn-jit/perf_ci/run_perf_collect.sh @@ -6,7 +6,8 @@ # Run each parametrized test in perf_tests.py under the device profiler (tracy) # and dump results into a directory per test. Use TT_METAL_PROFILER_DIR so each # run writes to a known subdir. At the end, runs summarize_perf_results.py to -# produce OUT_DIR/jit_perf_summary.json. +# produce OUT_DIR/perf_jit_summary[_JOB_ID].json. +# Set JOB_ID env var to include the job ID in the filename (required for CI). # # Usage: # ./test/ttnn-jit/perf_ci/run_perf_collect.sh [OUT_DIR] @@ -64,9 +65,10 @@ done echo "" echo "Results written under: $OUT_DIR" +SUMMARY_FILE="$OUT_DIR/perf_jit_summary${JOB_ID:+_$JOB_ID}.json" echo "Summarizing..." -if python test/ttnn-jit/perf_ci/summarize_perf_results.py "$OUT_DIR" -o "$OUT_DIR/jit_perf_summary.json"; then - echo "Summary written to $OUT_DIR/jit_perf_summary.json" +if python test/ttnn-jit/perf_ci/summarize_perf_results.py "$OUT_DIR" -o "$SUMMARY_FILE"; then + echo "Summary written to $SUMMARY_FILE" else echo "Warning: summarizer exited with an error (run dir may be partial)." >&2 fi From 6d640805c087f206870a001c4368a82442d665ab Mon Sep 17 00:00:00 2001 From: Saber Gholami Date: Thu, 12 Mar 2026 14:07:24 +0000 Subject: [PATCH 08/12] new json format --- .../perf_ci/summarize_perf_results.py | 37 ++++++++++++++++--- 1 file changed, 32 insertions(+), 5 deletions(-) diff --git a/test/ttnn-jit/perf_ci/summarize_perf_results.py b/test/ttnn-jit/perf_ci/summarize_perf_results.py index 30a74d389a5..a478412462d 100755 --- a/test/ttnn-jit/perf_ci/summarize_perf_results.py +++ b/test/ttnn-jit/perf_ci/summarize_perf_results.py @@ -231,22 +231,49 @@ def main(): g["ttnn_csv_path"] = r["csv_path"] g["math_fidelity_ttnn"] = r["math_fidelity"] - # Compute perf_pct_ttnn: (ttnn_duration / jit_duration) * 100 → 100 = same, <100 = JIT slower, >100 = JIT faster - out_rows: list[dict[str, Any]] = [] + # Compute perf_pct_ttnn: (ttnn_duration / jit_duration) * 100 + # 100 = same, <100 = JIT slower, >100 = JIT faster + measurements: list[dict[str, Any]] = [] for key in sorted(groups.keys()): g = groups[key] jit_ns = g["jit_duration_ns"] ttnn_ns = g["ttnn_duration_ns"] if jit_ns is not None and ttnn_ns is not None and jit_ns > 0: g["perf_pct_ttnn"] = round((ttnn_ns / jit_ns) * 100.0, 2) - out_rows.append(g) + + prefix = f"{g['op']}_{g['dtype']}_{g['memory_config_id']}" + if jit_ns is not None: + measurements.append( + {"measurement_name": f"{prefix}_jit_duration_ns", "value": jit_ns} + ) + if ttnn_ns is not None: + measurements.append( + {"measurement_name": f"{prefix}_ttnn_duration_ns", "value": ttnn_ns} + ) + if g["perf_pct_ttnn"] is not None: + measurements.append( + { + "measurement_name": f"{prefix}_perf_pct_ttnn", + "value": g["perf_pct_ttnn"], + } + ) + + report = { + "project": "tt-mlir", + "model": "ttnn_jit_perf", + "model_type": "jit_vs_ttnn", + "run_type": "benchmark", + "measurements": measurements, + } out_path.parent.mkdir(parents=True, exist_ok=True) with open(out_path, "w", encoding="utf-8") as f: - json.dump(out_rows, f, indent=2) + json.dump(report, f, indent=2) if not args.quiet: - print(f"Wrote {len(out_rows)} case(s) to {out_path}") + print( + f"Wrote {len(measurements)} measurement(s) from {len(groups)} case(s) to {out_path}" + ) return 0 From d563c3a9b20b510c7dd4f7d57679046761c34171 Mon Sep 17 00:00:00 2001 From: Saber Gholami Date: Thu, 12 Mar 2026 15:44:32 +0000 Subject: [PATCH 09/12] json format --- .../perf_ci/summarize_perf_results.py | 25 +++++++++++++------ 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/test/ttnn-jit/perf_ci/summarize_perf_results.py b/test/ttnn-jit/perf_ci/summarize_perf_results.py index a478412462d..c2eaa1f60ae 100755 --- a/test/ttnn-jit/perf_ci/summarize_perf_results.py +++ b/test/ttnn-jit/perf_ci/summarize_perf_results.py @@ -135,6 +135,19 @@ def make_case_key( return (op, h, w, dtype, memory_config_id or "") +def _measurement(name: str, value: float, step_name: str) -> dict[str, Any]: + return { + "measurement_name": name, + "value": value, + "iteration": 1, + "step_name": step_name, + "step_warm_up_num_iterations": 0, + "target": -1, + "device_power": -1.0, + "device_temperature": -1.0, + } + + def main(): parser = argparse.ArgumentParser( description="Summarize JIT perf run CSVs into one entry per (op, shape, dtype, memory_config) with JIT vs TTNN comparison." @@ -242,20 +255,16 @@ def main(): g["perf_pct_ttnn"] = round((ttnn_ns / jit_ns) * 100.0, 2) prefix = f"{g['op']}_{g['dtype']}_{g['memory_config_id']}" + step = f"{g['op']}_{g['shape']}_{g['dtype']}" if jit_ns is not None: - measurements.append( - {"measurement_name": f"{prefix}_jit_duration_ns", "value": jit_ns} - ) + measurements.append(_measurement(f"{prefix}_jit_duration_ns", jit_ns, step)) if ttnn_ns is not None: measurements.append( - {"measurement_name": f"{prefix}_ttnn_duration_ns", "value": ttnn_ns} + _measurement(f"{prefix}_ttnn_duration_ns", ttnn_ns, step) ) if g["perf_pct_ttnn"] is not None: measurements.append( - { - "measurement_name": f"{prefix}_perf_pct_ttnn", - "value": g["perf_pct_ttnn"], - } + _measurement(f"{prefix}_perf_pct_ttnn", g["perf_pct_ttnn"], step) ) report = { From 99fb6c07aba871616eebf8e4b5e2642a6f97b631 Mon Sep 17 00:00:00 2001 From: Saber Gholami Date: Fri, 13 Mar 2026 16:29:24 +0000 Subject: [PATCH 10/12] new json format --- .github/workflows/call-jit-perf-test.yml | 8 -- test/ttnn-jit/perf_ci/run_perf_collect.sh | 13 +- .../perf_ci/summarize_perf_results.py | 120 ++++++++++-------- 3 files changed, 74 insertions(+), 67 deletions(-) diff --git a/.github/workflows/call-jit-perf-test.yml b/.github/workflows/call-jit-perf-test.yml index cb34ec6c185..5b9cd144d39 100644 --- a/.github/workflows/call-jit-perf-test.yml +++ b/.github/workflows/call-jit-perf-test.yml @@ -151,14 +151,6 @@ jobs: test/ttnn-jit/perf_ci/run_perf_collect.sh ${{ steps.strings.outputs.perf-output-dir }} - - name: Upload JIT perf summary - uses: actions/upload-artifact@v4 - if: success() || failure() - with: - name: jit-perf-summary-${{ steps.fetch-job-id.outputs.job_id }} - path: ${{ steps.strings.outputs.perf-output-dir }}/perf_jit_summary_${{ steps.fetch-job-id.outputs.job_id }}.json - if-no-files-found: warn - - name: Upload JIT perf reports uses: actions/upload-artifact@v4 if: success() || failure() diff --git a/test/ttnn-jit/perf_ci/run_perf_collect.sh b/test/ttnn-jit/perf_ci/run_perf_collect.sh index 4a7a0118e45..b30af2bdeee 100755 --- a/test/ttnn-jit/perf_ci/run_perf_collect.sh +++ b/test/ttnn-jit/perf_ci/run_perf_collect.sh @@ -6,8 +6,8 @@ # Run each parametrized test in perf_tests.py under the device profiler (tracy) # and dump results into a directory per test. Use TT_METAL_PROFILER_DIR so each # run writes to a known subdir. At the end, runs summarize_perf_results.py to -# produce OUT_DIR/perf_jit_summary[_JOB_ID].json. -# Set JOB_ID env var to include the job ID in the filename (required for CI). +# produce one JSON report per test case in OUT_DIR (perf____.json). +# Set JOB_ID env var to include the job ID in filenames (required for CI). # # Usage: # ./test/ttnn-jit/perf_ci/run_perf_collect.sh [OUT_DIR] @@ -65,10 +65,13 @@ done echo "" echo "Results written under: $OUT_DIR" -SUMMARY_FILE="$OUT_DIR/perf_jit_summary${JOB_ID:+_$JOB_ID}.json" echo "Summarizing..." -if python test/ttnn-jit/perf_ci/summarize_perf_results.py "$OUT_DIR" -o "$SUMMARY_FILE"; then - echo "Summary written to $SUMMARY_FILE" +JOB_ID_ARG="" +if [ -n "$JOB_ID" ]; then + JOB_ID_ARG="--job-id $JOB_ID" +fi +if python test/ttnn-jit/perf_ci/summarize_perf_results.py "$OUT_DIR" --output-dir "$OUT_DIR" $JOB_ID_ARG; then + echo "Summary reports written to $OUT_DIR" else echo "Warning: summarizer exited with an error (run dir may be partial)." >&2 fi diff --git a/test/ttnn-jit/perf_ci/summarize_perf_results.py b/test/ttnn-jit/perf_ci/summarize_perf_results.py index c2eaa1f60ae..057baa5c12b 100755 --- a/test/ttnn-jit/perf_ci/summarize_perf_results.py +++ b/test/ttnn-jit/perf_ci/summarize_perf_results.py @@ -5,15 +5,18 @@ # # Read all ops_perf_results_*.csv under a run directory (from run_perf_collect.sh), # group JIT vs non-JIT by case (op, shape, dtype, memory_config_id) and write one -# entry per case with jit_duration_ns, ttnn_duration_ns, and perf_pct_ttnn. -# math_fidelity is not part of the key so JIT (e.g. HiFi4) and TTNN (e.g. HiFi2) pair. -# (100 = same, <100 = JIT slower, >100 = JIT faster). Suitable for Superset. +# JSON report per case with structured fields for Superset ingestion. +# +# Each report becomes its own benchmark_run row in Superset with clean filterable +# columns (model=op, precision=dtype, config=memory/shape/fidelity) and simple +# measurement names (jit_kernel_duration_ns, ttnn_kernel_duration_ns, perf_ratio). # # Usage: -# python test/ttnn-jit/perf_ci/summarize_perf_results.py RUN_DIR [-o OUTPUT.json] +# python test/ttnn-jit/perf_ci/summarize_perf_results.py RUN_DIR [--output-dir DIR] [--job-id ID] # # Example: -# python test/ttnn-jit/perf_ci/summarize_perf_results.py generated/jit_perf_reports/run_20250309_123456 -o jit_perf_summary.json +# python test/ttnn-jit/perf_ci/summarize_perf_results.py generated/jit_perf_reports/run_20250309_123456 +# python test/ttnn-jit/perf_ci/summarize_perf_results.py generated/jit_perf_reports/run_20250309_123456 --job-id 66822899875 import argparse import csv @@ -27,6 +30,10 @@ OUTPUT_0_DATATYPE_COL = "OUTPUT_0_DATATYPE" INPUT_0_DATATYPE_COL = "INPUT_0_DATATYPE" +UNARY_OPS = frozenset({"abs", "exp"}) + +MEMORY_CONFIG_IDS = ("dram_interleaved", "l1_interleaved") + def find_result_csvs(run_dir: Path): """Yield (test_id, csv_path) for each ops_perf_results_*.csv under run_dir.""" @@ -47,10 +54,6 @@ def find_result_csvs(run_dir: Path): yield test_id, csv_path -# Known memory_config suffixes in test_id (e.g. ...-dram_interleaved). -MEMORY_CONFIG_IDS = ("dram_interleaved", "l1_interleaved") - - def parse_test_id(test_id: str) -> Optional[dict]: """ Parse test_id into jit, op, h, w, and optionally memory_config_id. @@ -131,7 +134,7 @@ def read_csv_duration_and_meta(csv_path: Path) -> Optional[tuple[int, str, str]] def make_case_key( op: str, h: int, w: int, dtype: str, memory_config_id: Optional[str] ) -> tuple: - """Immutable key to group JIT and non-JIT runs of the same case. Excludes math_fidelity so JIT and TTNN runs (which may report different fidelities) pair into one entry.""" + """Immutable key to group JIT and non-JIT runs of the same case.""" return (op, h, w, dtype, memory_config_id or "") @@ -150,7 +153,7 @@ def _measurement(name: str, value: float, step_name: str) -> dict[str, Any]: def main(): parser = argparse.ArgumentParser( - description="Summarize JIT perf run CSVs into one entry per (op, shape, dtype, memory_config) with JIT vs TTNN comparison." + description="Summarize JIT perf run CSVs into one JSON report per (op, dtype, memory_config) test case for Superset." ) parser.add_argument( "run_dir", @@ -158,11 +161,16 @@ def main(): help="Directory produced by run_perf_collect.sh (contains test_id/reports/...)", ) parser.add_argument( - "-o", - "--output", + "--output-dir", type=Path, default=None, - help="Output JSON path (default: RUN_DIR/jit_perf_summary.json)", + help="Directory to write individual JSON reports (default: run_dir)", + ) + parser.add_argument( + "--job-id", + type=str, + default=None, + help="GitHub job ID to append to filenames (required for CI collect_data)", ) parser.add_argument( "-q", @@ -177,9 +185,10 @@ def main(): print(f"Error: not a directory: {run_dir}", file=sys.stderr) sys.exit(1) - out_path = args.output or (run_dir / "jit_perf_summary.json") + out_dir = (args.output_dir or run_dir).resolve() + out_dir.mkdir(parents=True, exist_ok=True) + job_suffix = f"_{args.job_id}" if args.job_id else "" - # Raw rows: one per CSV (test_id, jit, op, h, w, duration_ns, dtype, math_fidelity) raw: list[dict[str, Any]] = [] for test_id, csv_path in find_result_csvs(run_dir): parsed = parse_test_id(test_id) @@ -204,7 +213,6 @@ def main(): "duration_ns": duration_ns, "dtype": dtype, "math_fidelity": math_fidelity, - "csv_path": str(csv_path), } ) if not args.quiet: @@ -212,7 +220,6 @@ def main(): f" {test_id}: {duration_ns} ns (dtype={dtype!r}, math_fidelity={math_fidelity!r})" ) - # Group by case key (op, h, w, dtype, memory_config_id) so JIT and TTNN pair even when math_fidelity differs (e.g. matmul HiFi4 vs HiFi2) groups: dict[tuple, dict[str, Any]] = {} for r in raw: key = make_case_key( @@ -225,64 +232,69 @@ def main(): "w": r["w"], "shape": f"{r['h']}x{r['w']}", "dtype": r["dtype"], - "math_fidelity": r["math_fidelity"], - "math_fidelity_ttnn": None, + "math_fidelity_jit": "", + "math_fidelity_ttnn": "", "memory_config_id": r.get("memory_config_id") or "", "jit_duration_ns": None, "ttnn_duration_ns": None, - "perf_pct_ttnn": None, - "jit_csv_path": None, - "ttnn_csv_path": None, } g = groups[key] if r["jit"]: g["jit_duration_ns"] = r["duration_ns"] - g["jit_csv_path"] = r["csv_path"] - g["math_fidelity"] = r["math_fidelity"] + g["math_fidelity_jit"] = r["math_fidelity"] else: g["ttnn_duration_ns"] = r["duration_ns"] - g["ttnn_csv_path"] = r["csv_path"] g["math_fidelity_ttnn"] = r["math_fidelity"] - # Compute perf_pct_ttnn: (ttnn_duration / jit_duration) * 100 - # 100 = same, <100 = JIT slower, >100 = JIT faster - measurements: list[dict[str, Any]] = [] + file_count = 0 for key in sorted(groups.keys()): g = groups[key] + op = g["op"] + dtype = g["dtype"] + mem_cfg = g["memory_config_id"] + shape = g["shape"] jit_ns = g["jit_duration_ns"] ttnn_ns = g["ttnn_duration_ns"] - if jit_ns is not None and ttnn_ns is not None and jit_ns > 0: - g["perf_pct_ttnn"] = round((ttnn_ns / jit_ns) * 100.0, 2) + is_unary = op in UNARY_OPS - prefix = f"{g['op']}_{g['dtype']}_{g['memory_config_id']}" - step = f"{g['op']}_{g['shape']}_{g['dtype']}" + measurements = [] if jit_ns is not None: - measurements.append(_measurement(f"{prefix}_jit_duration_ns", jit_ns, step)) + measurements.append(_measurement("jit_kernel_duration_ns", jit_ns, op)) if ttnn_ns is not None: - measurements.append( - _measurement(f"{prefix}_ttnn_duration_ns", ttnn_ns, step) - ) - if g["perf_pct_ttnn"] is not None: - measurements.append( - _measurement(f"{prefix}_perf_pct_ttnn", g["perf_pct_ttnn"], step) - ) + measurements.append(_measurement("ttnn_kernel_duration_ns", ttnn_ns, op)) + if jit_ns is not None and ttnn_ns is not None and jit_ns > 0: + ratio = round(ttnn_ns / jit_ns, 4) + measurements.append(_measurement("perf_ratio", ratio, op)) - report = { - "project": "tt-mlir", - "model": "ttnn_jit_perf", - "model_type": "jit_vs_ttnn", - "run_type": "benchmark", - "measurements": measurements, - } + config = { + "input_a_shape": shape, + "input_b_shape": None if is_unary else shape, + "input_a_memory_config": mem_cfg, + "input_b_memory_config": None if is_unary else mem_cfg, + "math_fidelity_jit": g["math_fidelity_jit"], + "math_fidelity_ttnn": g["math_fidelity_ttnn"], + } + + report = { + "project": "tt-mlir", + "model": op, + "model_type": "jit_vs_ttnn", + "run_type": "op_benchmark", + "precision": dtype, + "config": config, + "measurements": measurements, + } - out_path.parent.mkdir(parents=True, exist_ok=True) - with open(out_path, "w", encoding="utf-8") as f: - json.dump(report, f, indent=2) + filename = f"perf_{op}_{dtype}_{mem_cfg}{job_suffix}.json" + filepath = out_dir / filename + with open(filepath, "w", encoding="utf-8") as f: + json.dump(report, f, indent=2) + file_count += 1 + if not args.quiet: + print(f" Wrote {filepath.name} ({len(measurements)} measurements)") if not args.quiet: - print( - f"Wrote {len(measurements)} measurement(s) from {len(groups)} case(s) to {out_path}" - ) + print(f"Wrote {file_count} report(s) from {len(groups)} case(s) to {out_dir}") return 0 From bb2f4e587a0bc274ad41822c66dd83ed65ee54e8 Mon Sep 17 00:00:00 2001 From: Saber Gholami Date: Fri, 13 Mar 2026 20:19:20 +0000 Subject: [PATCH 11/12] enable other nightly workflows --- .github/workflows/schedule-nightly.yml | 9 +++------ .github/workflows/workflow-run-collect-data.yml | 2 -- 2 files changed, 3 insertions(+), 8 deletions(-) diff --git a/.github/workflows/schedule-nightly.yml b/.github/workflows/schedule-nightly.yml index 37986f428ed..cd50ae5833f 100644 --- a/.github/workflows/schedule-nightly.yml +++ b/.github/workflows/schedule-nightly.yml @@ -32,18 +32,16 @@ jobs: runner: ${{ needs.prepare-run.outputs.runner }} sh_builder: ${{ fromJson(needs.prepare-run.outputs.sh_builder) }} component_matrix: ${{ needs.prepare-run.outputs.build_matrix }} - # TODO: Re-enable wheels-build after JIT perf CI debugging is complete. wheels-build: - if: false needs: [ prepare-run, build-image, release-build ] # release-build required so ttnn-jit wheel is built + if: needs.prepare-run.outputs.skip_build != 'true' uses: ./.github/workflows/call-build-wheels.yml secrets: inherit with: docker-tag: ${{ needs.build-image.outputs.docker-tag }} docker_image: ${{ needs.build-image.outputs.docker-image }} - # TODO: Re-enable test after JIT perf CI debugging is complete. test: - if: false + if: always() needs: [ prepare-run, build-image, release-build ] uses: ./.github/workflows/call-test.yml secrets: inherit @@ -59,9 +57,8 @@ jobs: with: docker_image: ${{ needs.build-image.outputs.docker-image }} - # TODO: Re-enable fail-notify after JIT perf CI debugging is complete. fail-notify: - if: false + if: always() needs: - prepare-run - build-image diff --git a/.github/workflows/workflow-run-collect-data.yml b/.github/workflows/workflow-run-collect-data.yml index add2cac7cd6..2567ad1dd78 100644 --- a/.github/workflows/workflow-run-collect-data.yml +++ b/.github/workflows/workflow-run-collect-data.yml @@ -26,8 +26,6 @@ jobs: run_attempt: ${{ github.event.workflow_run.run_attempt }} sftp_host: ${{ secrets.SFTP_CICD_WRITER_HOSTNAME }} sftp_user: ${{ secrets.SFTP_CICD_WRITER_USERNAME }} - sftp_perf_host: ${{ secrets.SFTP_PERF_WRITER_HOSTNAME }} - sftp_perf_user: ${{ secrets.SFTP_PERF_WRITER_USERNAME }} sftp_optest_host: ${{ secrets.SFTP_OP_TEST_WRITER_HOSTNAME }} sftp_optest_user: ${{ secrets.SFTP_OP_TEST_WRITER_USERNAME }} ssh-private-key: ${{ secrets.SFTP_CICD_WRITER_KEY }} From 081e865d77f727245e491a4ffccc22ff911cb18f Mon Sep 17 00:00:00 2001 From: Saber Gholami Date: Fri, 13 Mar 2026 20:20:50 +0000 Subject: [PATCH 12/12] nightly fix --- .github/workflows/schedule-nightly.yml | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/.github/workflows/schedule-nightly.yml b/.github/workflows/schedule-nightly.yml index cd50ae5833f..5d6af9c394c 100644 --- a/.github/workflows/schedule-nightly.yml +++ b/.github/workflows/schedule-nightly.yml @@ -34,15 +34,14 @@ jobs: component_matrix: ${{ needs.prepare-run.outputs.build_matrix }} wheels-build: needs: [ prepare-run, build-image, release-build ] # release-build required so ttnn-jit wheel is built - if: needs.prepare-run.outputs.skip_build != 'true' uses: ./.github/workflows/call-build-wheels.yml secrets: inherit with: docker-tag: ${{ needs.build-image.outputs.docker-tag }} docker_image: ${{ needs.build-image.outputs.docker-image }} test: - if: always() needs: [ prepare-run, build-image, release-build ] + if: needs.prepare-run.outputs.skip_build != 'true' uses: ./.github/workflows/call-test.yml secrets: inherit with: @@ -57,6 +56,7 @@ jobs: with: docker_image: ${{ needs.build-image.outputs.docker-image }} + fail-notify: if: always() needs: @@ -79,9 +79,8 @@ jobs: with: jobs: ${{ toJSON(needs) }} - # TODO: Re-enable fail-send-msg after JIT perf CI debugging is complete. fail-send-msg: - if: false + if: always() needs: - fail-notify - test