diff --git a/.github/workflows/call-jit-perf-test.yml b/.github/workflows/call-jit-perf-test.yml new file mode 100644 index 00000000000..5b9cd144d39 --- /dev/null +++ b/.github/workflows/call-jit-perf-test.yml @@ -0,0 +1,160 @@ +name: JIT Perf Test + +on: + workflow_call: + inputs: + docker_image: + description: 'Docker image for the build' + required: true + type: string + +permissions: + checks: write + packages: write + +env: + TRACY_NO_INVARIANT_CHECK: 1 + TRACY_NO_ISA_EXTENSIONS: 1 + +jobs: + run-jit-perf: + timeout-minutes: 120 + name: "JIT Perf Collection" + + runs-on: + - n150 + - in-service + + container: + image: ${{ inputs.docker_image }} + options: --device /dev/tenstorrent + volumes: + - /dev/hugepages:/dev/hugepages + - /dev/hugepages-1G:/dev/hugepages-1G + - /etc/udev/rules.d:/etc/udev/rules.d + - /lib/modules:/lib/modules + - /opt/tt_metal_infra/provisioning/provisioning_env:/opt/tt_metal_infra/provisioning/provisioning_env + + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Fetch job id + id: fetch-job-id + uses: tenstorrent/tt-github-actions/.github/actions/job_id@main + with: + job_name: "JIT Perf Collection" + + - name: Set reusable strings + id: strings + shell: bash + env: + JOB_ID: ${{ steps.fetch-job-id.outputs.job_id }} + run: | + echo "work-dir=$(pwd)" >> "$GITHUB_OUTPUT" + echo "build-output-dir=$(pwd)/build" >> "$GITHUB_OUTPUT" + echo "install-output-dir=$(pwd)/install" >> "$GITHUB_OUTPUT" + echo "perf-output-dir=$(pwd)/jit_perf_results" >> "$GITHUB_OUTPUT" + + - name: Git safe dir + run: git config --global --add safe.directory ${{ steps.strings.outputs.work-dir }} + + - name: Use install artifacts + uses: tenstorrent/tt-forge/.github/actions/download-artifact@main + with: + name: install-artifacts-tracy + path: install + github_token: ${{ secrets.GITHUB_TOKEN }} + + - name: Remove existing whls files + shell: bash + run: | + rm -f *.whl + + - name: Download ttrt whls + uses: actions/download-artifact@v4 + with: + name: ttrt-whl-tracy + + - name: Install ttrt whls + shell: bash + run: | + source env/activate + pip show ttrt && pip uninstall -y ttrt + pip install ttrt*.whl --upgrade + + - name: Download Build Artifacts + uses: tenstorrent/tt-forge/.github/actions/download-artifact@main + with: + name: build-artifacts-tracy + path: build + github_token: ${{ secrets.GITHUB_TOKEN }} + + - name: Generate system descriptor + shell: bash + run: | + source env/activate + ttrt query --save-artifacts + + - name: Download and install ttmlir and ttnn-jit wheels + shell: bash + env: + GH_TOKEN: ${{ secrets.GH_TOKEN || github.token }} + run: | + source env/activate + rm -f ttmlir*.whl ttnn_jit*.whl + + gh run download ${{ github.run_id }} --repo ${{ github.repository }} --name ttmlir-whl-tracy + gh run download ${{ github.run_id }} --repo ${{ github.repository }} --name ttnn-jit-whl-tracy + + pip show ttmlir &> /dev/null && pip uninstall -y ttmlir + pip show ttnn-jit &> /dev/null && pip uninstall -y ttnn-jit + pip install ttnn_jit*.whl --find-links . --upgrade + + - name: Set up tt-triage + shell: bash + run: | + TT_METAL_VERSION=$(grep 'set(TT_METAL_VERSION' third_party/CMakeLists.txt | sed 's/.*"\(.*\)".*/\1/') + + mkdir -p tt-triage + curl -L "https://github.com/tenstorrent/tt-metal/archive/${TT_METAL_VERSION}.tar.gz" \ + | tar -xz -C tt-triage --strip-components=1 \ + tt-metal-${TT_METAL_VERSION}/scripts/ttexalens_ref.txt \ + tt-metal-${TT_METAL_VERSION}/tools/tt-triage.py \ + tt-metal-${TT_METAL_VERSION}/tools/triage \ + tt-metal-${TT_METAL_VERSION}/tt_metal + + echo "TT_METAL_OPERATION_TIMEOUT_SECONDS=${{ vars.TT_METAL_OPERATION_TIMEOUT_SECONDS || 300 }}" >> $GITHUB_ENV + echo "TT_METAL_DISPATCH_TIMEOUT_COMMAND_TO_EXECUTE=python $(pwd)/tt-triage/tools/tt-triage.py 1>&2" >> $GITHUB_ENV + + - name: Set up tracy profiler tools + shell: bash + run: | + TRACY_BIN_DIR="${{ steps.strings.outputs.work-dir }}/third_party/tt-metal/src/tt-metal/build/tools/profiler/bin" + mkdir -p "$TRACY_BIN_DIR" + cp ${{ steps.strings.outputs.build-output-dir }}/python_packages/ttrt/runtime/capture-release "$TRACY_BIN_DIR/" + cp ${{ steps.strings.outputs.build-output-dir }}/python_packages/ttrt/runtime/csvexport-release "$TRACY_BIN_DIR/" + + - name: Run JIT perf collection + shell: bash + env: + JOB_ID: ${{ steps.fetch-job-id.outputs.job_id }} + run: | + source env/activate + export PYTHONPATH="${{ steps.strings.outputs.install-output-dir }}/tt-metal/ttnn:${{ steps.strings.outputs.install-output-dir }}/tt-metal" + export LD_LIBRARY_PATH="${{ steps.strings.outputs.install-output-dir }}/lib:${TTMLIR_TOOLCHAIN_DIR}/lib:${LD_LIBRARY_PATH}" + export SYSTEM_DESC_PATH="${GITHUB_WORKSPACE}/ttrt-artifacts/system_desc.ttsys" + export TT_METAL_RUNTIME_ROOT="${{ steps.strings.outputs.install-output-dir }}/tt-metal" + export TT_METAL_HOME="${{ steps.strings.outputs.work-dir }}/third_party/tt-metal/src/tt-metal" + ln -sf ${{ steps.strings.outputs.install-output-dir }} ${{ steps.strings.outputs.build-output-dir }} + + test/ttnn-jit/perf_ci/run_perf_collect.sh ${{ steps.strings.outputs.perf-output-dir }} + + - name: Upload JIT perf reports + uses: actions/upload-artifact@v4 + if: success() || failure() + with: + name: jit-perf-reports-${{ steps.fetch-job-id.outputs.job_id }} + path: ${{ steps.strings.outputs.perf-output-dir }} + if-no-files-found: warn diff --git a/.github/workflows/schedule-nightly.yml b/.github/workflows/schedule-nightly.yml index e367d7da44a..5d6af9c394c 100644 --- a/.github/workflows/schedule-nightly.yml +++ b/.github/workflows/schedule-nightly.yml @@ -49,6 +49,14 @@ jobs: test_matrix: ${{ needs.prepare-run.outputs.test_matrix }} timeout: ${{ fromJson(needs.prepare-run.outputs.test_timeout) }} + jit-perf-test: + needs: [ build-image, release-build ] + uses: ./.github/workflows/call-jit-perf-test.yml + secrets: inherit + with: + docker_image: ${{ needs.build-image.outputs.docker-image }} + + fail-notify: if: always() needs: diff --git a/test/ttnn-jit/perf_ci/perf_tests.py b/test/ttnn-jit/perf_ci/perf_tests.py new file mode 100644 index 00000000000..fb2e2f7ffe4 --- /dev/null +++ b/test/ttnn-jit/perf_ci/perf_tests.py @@ -0,0 +1,101 @@ +# SPDX-FileCopyrightText: (c) 2025 Tenstorrent AI ULC +# +# SPDX-License-Identifier: Apache-2.0 + +import ttnn +import ttnn_jit +import torch + +import pytest + +from op_definitions import abs, exp, add, mul, matmul + +# Memory configs that pass for all ops and both JIT and non-JIT. +# DRAM interleaved works for matmul (requires interleaved) and all elementwise ops. +# L1 interleaved is not used: JIT runtime fails with RuntimeError on L1 interleaved +# inputs (submit path), so we only test DRAM interleaved for paired JIT vs TTNN comparison. +MEMORY_CONFIGS = [ + (ttnn.DRAM_MEMORY_CONFIG, "dram_interleaved"), +] + + +def is_unary(op): + return op == abs or op == exp + + +@pytest.mark.parametrize( + "h, w", + [ + (2048, 2048), + ], +) +@pytest.mark.parametrize( + "op", + [ + abs, + exp, + add, + mul, + matmul, + ], + ids=[ + "abs", + "exp", + "add", + "mul", + "matmul", + ], +) +@pytest.mark.parametrize( + "dtype, ttnn_dtype", + [ + (torch.bfloat16, ttnn.DataType.BFLOAT16), + (torch.bfloat16, ttnn.DataType.BFLOAT8_B), + ], + ids=["bf16", "bfp8"], +) +@pytest.mark.parametrize( + "memory_config, memory_config_id", + MEMORY_CONFIGS, + ids=[id for _, id in MEMORY_CONFIGS], +) +@pytest.mark.parametrize( + "jit_enabled", + [ + True, + False, + ], +) +def test_op_compare( + h, w, op, dtype, ttnn_dtype, memory_config, memory_config_id, jit_enabled +): + device = ttnn.open_device(device_id=0) + torch_tensor_a = torch.rand((h, w), dtype=dtype) * 100 + torch_tensor_b = torch.rand((h, w), dtype=dtype) * 100 + + input_a = ttnn.from_torch( + torch_tensor_a, + dtype=ttnn_dtype, + layout=ttnn.TILE_LAYOUT, + device=device, + memory_config=memory_config, + ) + input_b = ttnn.from_torch( + torch_tensor_b, + dtype=ttnn_dtype, + layout=ttnn.TILE_LAYOUT, + device=device, + memory_config=memory_config, + ) + + function_to_test = ( + ttnn_jit.jit(debug=True, enable_cache=True)(op) if jit_enabled else op + ) + output_tensor = ( + function_to_test(input_a) + if is_unary(op) + else function_to_test(input_a, input_b) + ) + + print(f"output_tensor\n: {output_tensor}") + ttnn.close_device(device) diff --git a/test/ttnn-jit/perf_ci/run_perf_collect.sh b/test/ttnn-jit/perf_ci/run_perf_collect.sh new file mode 100755 index 00000000000..b30af2bdeee --- /dev/null +++ b/test/ttnn-jit/perf_ci/run_perf_collect.sh @@ -0,0 +1,77 @@ +#!/usr/bin/env bash +# SPDX-FileCopyrightText: (c) 2025 Tenstorrent AI ULC +# +# SPDX-License-Identifier: Apache-2.0 +# +# Run each parametrized test in perf_tests.py under the device profiler (tracy) +# and dump results into a directory per test. Use TT_METAL_PROFILER_DIR so each +# run writes to a known subdir. At the end, runs summarize_perf_results.py to +# produce one JSON report per test case in OUT_DIR (perf____.json). +# Set JOB_ID env var to include the job ID in filenames (required for CI). +# +# Usage: +# ./test/ttnn-jit/perf_ci/run_perf_collect.sh [OUT_DIR] +# +# Example: +# ./test/ttnn-jit/perf_ci/run_perf_collect.sh +# ./test/ttnn-jit/perf_ci/run_perf_collect.sh generated/jit_perf_reports/my_run + +set -e + +# Script lives in test/ttnn-jit/perf_ci/; go up three levels to repo root +REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../../.." && pwd)" +cd "$REPO_ROOT" + +# Activate venv if not already active +if [ -z "$VIRTUAL_ENV" ] && [ -f env/activate ]; then + # shellcheck source=/dev/null + source env/activate +fi + +OUT_DIR="${1:-generated/jit_perf_reports/run_$(date +%Y%m%d_%H%M%S)}" +mkdir -p "$OUT_DIR" +OUT_DIR="$(cd "$OUT_DIR" && pwd)" + +# Collect test ids from perf_tests.py (whatever is parametrized there) +collect_out=$(mktemp) +if ! pytest test/ttnn-jit/perf_ci/perf_tests.py --collect-only -q >"$collect_out" 2>&1; then + echo "Error: pytest collect failed:" >&2 + cat "$collect_out" >&2 + rm -f "$collect_out" + exit 1 +fi +TESTS=($(sed -n 's/.*test_op_compare\[\(.*\)\]/\1/p' <"$collect_out")) +if [ ${#TESTS[@]} -eq 0 ]; then + echo "Error: no test_op_compare[*] tests found in test/ttnn-jit/perf_ci/perf_tests.py. Pytest collect output:" >&2 + cat "$collect_out" >&2 + rm -f "$collect_out" + exit 1 +fi +rm -f "$collect_out" +echo "Collected ${#TESTS[@]} tests from perf_tests.py" + +export TT_METAL_DEVICE_PROFILER=1 + +for tid in "${TESTS[@]}"; do + echo "==============================================" + echo "Running test_op_compare[$tid] ..." + echo "==============================================" + export TT_METAL_PROFILER_DIR="$OUT_DIR/$tid" + mkdir -p "$TT_METAL_PROFILER_DIR" + if ! python -m tracy -m -r -p "pytest test/ttnn-jit/perf_ci/perf_tests.py::test_op_compare[$tid]"; then + echo "Warning: test_op_compare[$tid] exited with non-zero status (results may still be present)." + fi +done + +echo "" +echo "Results written under: $OUT_DIR" +echo "Summarizing..." +JOB_ID_ARG="" +if [ -n "$JOB_ID" ]; then + JOB_ID_ARG="--job-id $JOB_ID" +fi +if python test/ttnn-jit/perf_ci/summarize_perf_results.py "$OUT_DIR" --output-dir "$OUT_DIR" $JOB_ID_ARG; then + echo "Summary reports written to $OUT_DIR" +else + echo "Warning: summarizer exited with an error (run dir may be partial)." >&2 +fi diff --git a/test/ttnn-jit/perf_ci/summarize_perf_results.py b/test/ttnn-jit/perf_ci/summarize_perf_results.py new file mode 100755 index 00000000000..057baa5c12b --- /dev/null +++ b/test/ttnn-jit/perf_ci/summarize_perf_results.py @@ -0,0 +1,302 @@ +#!/usr/bin/env python3 +# SPDX-FileCopyrightText: (c) 2025 Tenstorrent AI ULC +# +# SPDX-License-Identifier: Apache-2.0 +# +# Read all ops_perf_results_*.csv under a run directory (from run_perf_collect.sh), +# group JIT vs non-JIT by case (op, shape, dtype, memory_config_id) and write one +# JSON report per case with structured fields for Superset ingestion. +# +# Each report becomes its own benchmark_run row in Superset with clean filterable +# columns (model=op, precision=dtype, config=memory/shape/fidelity) and simple +# measurement names (jit_kernel_duration_ns, ttnn_kernel_duration_ns, perf_ratio). +# +# Usage: +# python test/ttnn-jit/perf_ci/summarize_perf_results.py RUN_DIR [--output-dir DIR] [--job-id ID] +# +# Example: +# python test/ttnn-jit/perf_ci/summarize_perf_results.py generated/jit_perf_reports/run_20250309_123456 +# python test/ttnn-jit/perf_ci/summarize_perf_results.py generated/jit_perf_reports/run_20250309_123456 --job-id 66822899875 + +import argparse +import csv +import json +import sys +from pathlib import Path +from typing import Any, Optional + +DEVICE_KERNEL_DURATION_COL = "DEVICE KERNEL DURATION [ns]" +MATH_FIDELITY_COL = "MATH FIDELITY" +OUTPUT_0_DATATYPE_COL = "OUTPUT_0_DATATYPE" +INPUT_0_DATATYPE_COL = "INPUT_0_DATATYPE" + +UNARY_OPS = frozenset({"abs", "exp"}) + +MEMORY_CONFIG_IDS = ("dram_interleaved", "l1_interleaved") + + +def find_result_csvs(run_dir: Path): + """Yield (test_id, csv_path) for each ops_perf_results_*.csv under run_dir.""" + run_dir = run_dir.resolve() + if not run_dir.is_dir(): + return + for test_dir in run_dir.iterdir(): + if not test_dir.is_dir(): + continue + test_id = test_dir.name + reports_dir = test_dir / "reports" + if not reports_dir.is_dir(): + continue + for ts_dir in reports_dir.iterdir(): + if not ts_dir.is_dir(): + continue + for csv_path in ts_dir.glob("ops_perf_results_*.csv"): + yield test_id, csv_path + + +def parse_test_id(test_id: str) -> Optional[dict]: + """ + Parse test_id into jit, op, h, w, and optionally memory_config_id. + Supports: 'True-abs-256-256' (4 parts), 'True-bf16-abs-256-256' (5), + 'True-dram_interleaved-bf16-abs-256-256' (6). + """ + parts = test_id.split("-") + if len(parts) < 4: + return None + jit = parts[0].lower() == "true" + memory_config_id: Optional[str] = None + if len(parts) == 6 and parts[1] in MEMORY_CONFIG_IDS: + memory_config_id = parts[1] + op = parts[3] + try: + h, w = int(parts[4]), int(parts[5]) + except (ValueError, IndexError): + return None + elif len(parts) == 5: + op = parts[2] + try: + h, w = int(parts[3]), int(parts[4]) + except (ValueError, IndexError): + return None + elif len(parts) == 4: + op = parts[1] + try: + h, w = int(parts[2]), int(parts[3]) + except (ValueError, IndexError): + return None + else: + try: + h, w = int(parts[-2]), int(parts[-1]) + except (ValueError, IndexError): + return None + op = "-".join(parts[1:-2]) + return {"jit": jit, "op": op, "h": h, "w": w, "memory_config_id": memory_config_id} + + +def read_csv_duration_and_meta(csv_path: Path) -> Optional[tuple[int, str, str]]: + """ + Read CSV: sum DEVICE KERNEL DURATION [ns], and from first data row return + (duration_ns, dtype, math_fidelity). dtype/math_fidelity may be empty if + column missing. + """ + total = 0 + found_duration = False + dtype = "" + math_fidelity = "" + with open(csv_path, newline="", encoding="utf-8") as f: + reader = csv.DictReader(f) + fieldnames = reader.fieldnames or [] + for row in reader: + if DEVICE_KERNEL_DURATION_COL in fieldnames: + val = row.get(DEVICE_KERNEL_DURATION_COL, "").strip() + if val and val != "-": + try: + total += int(float(val)) + found_duration = True + except (ValueError, TypeError): + pass + if not dtype and ( + OUTPUT_0_DATATYPE_COL in fieldnames + or INPUT_0_DATATYPE_COL in fieldnames + ): + dtype = ( + row.get(OUTPUT_0_DATATYPE_COL) + or row.get(INPUT_0_DATATYPE_COL) + or "" + ).strip() + if not math_fidelity and MATH_FIDELITY_COL in fieldnames: + math_fidelity = (row.get(MATH_FIDELITY_COL) or "").strip() + if not found_duration: + return None + return (total, dtype, math_fidelity) + + +def make_case_key( + op: str, h: int, w: int, dtype: str, memory_config_id: Optional[str] +) -> tuple: + """Immutable key to group JIT and non-JIT runs of the same case.""" + return (op, h, w, dtype, memory_config_id or "") + + +def _measurement(name: str, value: float, step_name: str) -> dict[str, Any]: + return { + "measurement_name": name, + "value": value, + "iteration": 1, + "step_name": step_name, + "step_warm_up_num_iterations": 0, + "target": -1, + "device_power": -1.0, + "device_temperature": -1.0, + } + + +def main(): + parser = argparse.ArgumentParser( + description="Summarize JIT perf run CSVs into one JSON report per (op, dtype, memory_config) test case for Superset." + ) + parser.add_argument( + "run_dir", + type=Path, + help="Directory produced by run_perf_collect.sh (contains test_id/reports/...)", + ) + parser.add_argument( + "--output-dir", + type=Path, + default=None, + help="Directory to write individual JSON reports (default: run_dir)", + ) + parser.add_argument( + "--job-id", + type=str, + default=None, + help="GitHub job ID to append to filenames (required for CI collect_data)", + ) + parser.add_argument( + "-q", + "--quiet", + action="store_true", + help="Do not print progress", + ) + args = parser.parse_args() + + run_dir = args.run_dir.resolve() + if not run_dir.is_dir(): + print(f"Error: not a directory: {run_dir}", file=sys.stderr) + sys.exit(1) + + out_dir = (args.output_dir or run_dir).resolve() + out_dir.mkdir(parents=True, exist_ok=True) + job_suffix = f"_{args.job_id}" if args.job_id else "" + + raw: list[dict[str, Any]] = [] + for test_id, csv_path in find_result_csvs(run_dir): + parsed = parse_test_id(test_id) + if not parsed: + if not args.quiet: + print(f"Skip (bad test_id): {test_id}", file=sys.stderr) + continue + result = read_csv_duration_and_meta(csv_path) + if result is None: + if not args.quiet: + print(f"Skip (no duration): {csv_path}", file=sys.stderr) + continue + duration_ns, dtype, math_fidelity = result + raw.append( + { + "test_id": test_id, + "jit": parsed["jit"], + "op": parsed["op"], + "h": parsed["h"], + "w": parsed["w"], + "memory_config_id": parsed.get("memory_config_id"), + "duration_ns": duration_ns, + "dtype": dtype, + "math_fidelity": math_fidelity, + } + ) + if not args.quiet: + print( + f" {test_id}: {duration_ns} ns (dtype={dtype!r}, math_fidelity={math_fidelity!r})" + ) + + groups: dict[tuple, dict[str, Any]] = {} + for r in raw: + key = make_case_key( + r["op"], r["h"], r["w"], r["dtype"], r.get("memory_config_id") + ) + if key not in groups: + groups[key] = { + "op": r["op"], + "h": r["h"], + "w": r["w"], + "shape": f"{r['h']}x{r['w']}", + "dtype": r["dtype"], + "math_fidelity_jit": "", + "math_fidelity_ttnn": "", + "memory_config_id": r.get("memory_config_id") or "", + "jit_duration_ns": None, + "ttnn_duration_ns": None, + } + g = groups[key] + if r["jit"]: + g["jit_duration_ns"] = r["duration_ns"] + g["math_fidelity_jit"] = r["math_fidelity"] + else: + g["ttnn_duration_ns"] = r["duration_ns"] + g["math_fidelity_ttnn"] = r["math_fidelity"] + + file_count = 0 + for key in sorted(groups.keys()): + g = groups[key] + op = g["op"] + dtype = g["dtype"] + mem_cfg = g["memory_config_id"] + shape = g["shape"] + jit_ns = g["jit_duration_ns"] + ttnn_ns = g["ttnn_duration_ns"] + is_unary = op in UNARY_OPS + + measurements = [] + if jit_ns is not None: + measurements.append(_measurement("jit_kernel_duration_ns", jit_ns, op)) + if ttnn_ns is not None: + measurements.append(_measurement("ttnn_kernel_duration_ns", ttnn_ns, op)) + if jit_ns is not None and ttnn_ns is not None and jit_ns > 0: + ratio = round(ttnn_ns / jit_ns, 4) + measurements.append(_measurement("perf_ratio", ratio, op)) + + config = { + "input_a_shape": shape, + "input_b_shape": None if is_unary else shape, + "input_a_memory_config": mem_cfg, + "input_b_memory_config": None if is_unary else mem_cfg, + "math_fidelity_jit": g["math_fidelity_jit"], + "math_fidelity_ttnn": g["math_fidelity_ttnn"], + } + + report = { + "project": "tt-mlir", + "model": op, + "model_type": "jit_vs_ttnn", + "run_type": "op_benchmark", + "precision": dtype, + "config": config, + "measurements": measurements, + } + + filename = f"perf_{op}_{dtype}_{mem_cfg}{job_suffix}.json" + filepath = out_dir / filename + with open(filepath, "w", encoding="utf-8") as f: + json.dump(report, f, indent=2) + file_count += 1 + if not args.quiet: + print(f" Wrote {filepath.name} ({len(measurements)} measurements)") + + if not args.quiet: + print(f"Wrote {file_count} report(s) from {len(groups)} case(s) to {out_dir}") + return 0 + + +if __name__ == "__main__": + sys.exit(main())