tenstorrent · sgholamiTT · Mar 10, 2026 · Mar 10, 2026 · Mar 10, 2026 · Mar 10, 2026
@@ -0,0 +1,160 @@
+name: JIT Perf Test
+
+on:
+  workflow_call:
+    inputs:
+      docker_image:
+        description: 'Docker image for the build'
+        required: true
+        type: string
+
+permissions:
+  checks: write
+  packages: write
+
+env:
+  TRACY_NO_INVARIANT_CHECK: 1
+  TRACY_NO_ISA_EXTENSIONS: 1
+
+jobs:
+  run-jit-perf:
+    timeout-minutes: 120
+    name: "JIT Perf Collection"
+
+    runs-on:
+      - n150
+      - in-service
+
+    container:
+      image: ${{ inputs.docker_image }}
+      options: --device /dev/tenstorrent
+      volumes:
+        - /dev/hugepages:/dev/hugepages
+        - /dev/hugepages-1G:/dev/hugepages-1G
+        - /etc/udev/rules.d:/etc/udev/rules.d
+        - /lib/modules:/lib/modules
+        - /opt/tt_metal_infra/provisioning/provisioning_env:/opt/tt_metal_infra/provisioning/provisioning_env
+
+    steps:
+    - uses: actions/checkout@v4
+      with:
+        fetch-depth: 0
+
+    - name: Fetch job id
+      id: fetch-job-id
+      uses: tenstorrent/tt-github-actions/.github/actions/job_id@main
+      with:
+        job_name: "JIT Perf Collection"
+
+    - name: Set reusable strings
+      id: strings
+      shell: bash
+      env:
+        JOB_ID: ${{ steps.fetch-job-id.outputs.job_id }}
+      run: |
+        echo "work-dir=$(pwd)" >> "$GITHUB_OUTPUT"
+        echo "build-output-dir=$(pwd)/build" >> "$GITHUB_OUTPUT"
+        echo "install-output-dir=$(pwd)/install" >> "$GITHUB_OUTPUT"
+        echo "perf-output-dir=$(pwd)/jit_perf_results" >> "$GITHUB_OUTPUT"
+
+    - name: Git safe dir
+      run: git config --global --add safe.directory ${{ steps.strings.outputs.work-dir }}
+
+    - name: Use install artifacts
+      uses: tenstorrent/tt-forge/.github/actions/download-artifact@main
+      with:
+        name: install-artifacts-tracy
+        path: install
+        github_token: ${{ secrets.GITHUB_TOKEN }}
+
+    - name: Remove existing whls files
+      shell: bash
+      run: |
+        rm -f *.whl
+
+    - name: Download ttrt whls
+      uses: actions/download-artifact@v4
+      with:
+        name: ttrt-whl-tracy
+
+    - name: Install ttrt whls
+      shell: bash
+      run: |
+        source env/activate
+        pip show ttrt && pip uninstall -y ttrt
+        pip install ttrt*.whl --upgrade
+
+    - name: Download Build Artifacts
+      uses: tenstorrent/tt-forge/.github/actions/download-artifact@main
+      with:
+        name: build-artifacts-tracy
+        path: build
+        github_token: ${{ secrets.GITHUB_TOKEN }}
+
+    - name: Generate system descriptor
+      shell: bash
+      run: |
+        source env/activate
+        ttrt query --save-artifacts
+
+    - name: Download and install ttmlir and ttnn-jit wheels
+      shell: bash
+      env:
+        GH_TOKEN: ${{ secrets.GH_TOKEN || github.token }}
+      run: |
+        source env/activate
+        rm -f ttmlir*.whl ttnn_jit*.whl
+
+        gh run download ${{ github.run_id }} --repo ${{ github.repository }} --name ttmlir-whl-tracy
+        gh run download ${{ github.run_id }} --repo ${{ github.repository }} --name ttnn-jit-whl-tracy
+
+        pip show ttmlir &> /dev/null && pip uninstall -y ttmlir
+        pip show ttnn-jit &> /dev/null && pip uninstall -y ttnn-jit
+        pip install ttnn_jit*.whl --find-links . --upgrade
+
+    - name: Set up tt-triage
+      shell: bash
+      run: |
+        TT_METAL_VERSION=$(grep 'set(TT_METAL_VERSION' third_party/CMakeLists.txt | sed 's/.*"\(.*\)".*/\1/')
+
+        mkdir -p tt-triage
+        curl -L "https://github.com/tenstorrent/tt-metal/archive/${TT_METAL_VERSION}.tar.gz" \
+          | tar -xz -C tt-triage --strip-components=1 \
+              tt-metal-${TT_METAL_VERSION}/scripts/ttexalens_ref.txt \
+              tt-metal-${TT_METAL_VERSION}/tools/tt-triage.py \
+              tt-metal-${TT_METAL_VERSION}/tools/triage \
+              tt-metal-${TT_METAL_VERSION}/tt_metal
+
+        echo "TT_METAL_OPERATION_TIMEOUT_SECONDS=${{ vars.TT_METAL_OPERATION_TIMEOUT_SECONDS || 300 }}" >> $GITHUB_ENV
+        echo "TT_METAL_DISPATCH_TIMEOUT_COMMAND_TO_EXECUTE=python $(pwd)/tt-triage/tools/tt-triage.py 1>&2" >> $GITHUB_ENV
+
+    - name: Set up tracy profiler tools
+      shell: bash
+      run: |
+        TRACY_BIN_DIR="${{ steps.strings.outputs.work-dir }}/third_party/tt-metal/src/tt-metal/build/tools/profiler/bin"
+        mkdir -p "$TRACY_BIN_DIR"
+        cp ${{ steps.strings.outputs.build-output-dir }}/python_packages/ttrt/runtime/capture-release "$TRACY_BIN_DIR/"
+        cp ${{ steps.strings.outputs.build-output-dir }}/python_packages/ttrt/runtime/csvexport-release "$TRACY_BIN_DIR/"
+
+    - name: Run JIT perf collection
+      shell: bash
+      env:
+        JOB_ID: ${{ steps.fetch-job-id.outputs.job_id }}
+      run: |
+        source env/activate
+        export PYTHONPATH="${{ steps.strings.outputs.install-output-dir }}/tt-metal/ttnn:${{ steps.strings.outputs.install-output-dir }}/tt-metal"
+        export LD_LIBRARY_PATH="${{ steps.strings.outputs.install-output-dir }}/lib:${TTMLIR_TOOLCHAIN_DIR}/lib:${LD_LIBRARY_PATH}"
+        export SYSTEM_DESC_PATH="${GITHUB_WORKSPACE}/ttrt-artifacts/system_desc.ttsys"
+        export TT_METAL_RUNTIME_ROOT="${{ steps.strings.outputs.install-output-dir }}/tt-metal"
+        export TT_METAL_HOME="${{ steps.strings.outputs.work-dir }}/third_party/tt-metal/src/tt-metal"
+        ln -sf ${{ steps.strings.outputs.install-output-dir }} ${{ steps.strings.outputs.build-output-dir }}
+
+        test/ttnn-jit/perf_ci/run_perf_collect.sh ${{ steps.strings.outputs.perf-output-dir }}
+
+    - name: Upload JIT perf reports
+      uses: actions/upload-artifact@v4
+      if: success() || failure()
+      with:
+        name: jit-perf-reports-${{ steps.fetch-job-id.outputs.job_id }}
+        path: ${{ steps.strings.outputs.perf-output-dir }}
+        if-no-files-found: warn
@@ -49,6 +49,14 @@ jobs:
       test_matrix: ${{ needs.prepare-run.outputs.test_matrix }}
       timeout: ${{ fromJson(needs.prepare-run.outputs.test_timeout) }}
 
+  jit-perf-test:
+    needs: [ build-image, release-build ]
+    uses: ./.github/workflows/call-jit-perf-test.yml
+    secrets: inherit
+    with:
+      docker_image: ${{ needs.build-image.outputs.docker-image }}
+
+
   fail-notify:
     if: always()
     needs:

@@ -0,0 +1,101 @@
+# SPDX-FileCopyrightText: (c) 2025 Tenstorrent AI ULC
+#
+# SPDX-License-Identifier: Apache-2.0
+
+import ttnn
+import ttnn_jit
+import torch
+
+import pytest
+
+from op_definitions import abs, exp, add, mul, matmul
+
+# Memory configs that pass for all ops and both JIT and non-JIT.
+# DRAM interleaved works for matmul (requires interleaved) and all elementwise ops.
+# L1 interleaved is not used: JIT runtime fails with RuntimeError on L1 interleaved
+# inputs (submit path), so we only test DRAM interleaved for paired JIT vs TTNN comparison.
+MEMORY_CONFIGS = [
+    (ttnn.DRAM_MEMORY_CONFIG, "dram_interleaved"),
+]
+
+
+def is_unary(op):
+    return op == abs or op == exp
+
+
+@pytest.mark.parametrize(
+    "h, w",
+    [
+        (2048, 2048),
+    ],
+)
+@pytest.mark.parametrize(
+    "op",
+    [
+        abs,
+        exp,
+        add,
+        mul,
+        matmul,
+    ],
+    ids=[
+        "abs",
+        "exp",
+        "add",
+        "mul",
+        "matmul",
+    ],
+)
+@pytest.mark.parametrize(
+    "dtype, ttnn_dtype",
+    [
+        (torch.bfloat16, ttnn.DataType.BFLOAT16),
+        (torch.bfloat16, ttnn.DataType.BFLOAT8_B),
+    ],
+    ids=["bf16", "bfp8"],
+)
+@pytest.mark.parametrize(
+    "memory_config, memory_config_id",
+    MEMORY_CONFIGS,
+    ids=[id for _, id in MEMORY_CONFIGS],
+)
+@pytest.mark.parametrize(
+    "jit_enabled",
+    [
+        True,
+        False,
+    ],
+)
+def test_op_compare(
+    h, w, op, dtype, ttnn_dtype, memory_config, memory_config_id, jit_enabled
+):
+    device = ttnn.open_device(device_id=0)
+    torch_tensor_a = torch.rand((h, w), dtype=dtype) * 100
+    torch_tensor_b = torch.rand((h, w), dtype=dtype) * 100
+
+    input_a = ttnn.from_torch(
+        torch_tensor_a,
+        dtype=ttnn_dtype,
+        layout=ttnn.TILE_LAYOUT,
+        device=device,
+        memory_config=memory_config,
+    )
+    input_b = ttnn.from_torch(
+        torch_tensor_b,
+        dtype=ttnn_dtype,
+        layout=ttnn.TILE_LAYOUT,
+        device=device,
+        memory_config=memory_config,
+    )
+
+    function_to_test = (
+        ttnn_jit.jit(debug=True, enable_cache=True)(op) if jit_enabled else op
+    )
+    output_tensor = (
+        function_to_test(input_a)
+        if is_unary(op)
+        else function_to_test(input_a, input_b)
+    )
+
+    print(f"output_tensor\n: {output_tensor}")
+    ttnn.close_device(device)
@@ -0,0 +1,77 @@
+#!/usr/bin/env bash
+# SPDX-FileCopyrightText: (c) 2025 Tenstorrent AI ULC
+#
+# SPDX-License-Identifier: Apache-2.0
+#
+# Run each parametrized test in perf_tests.py under the device profiler (tracy)
+# and dump results into a directory per test. Use TT_METAL_PROFILER_DIR so each
+# run writes to a known subdir. At the end, runs summarize_perf_results.py to
+# produce one JSON report per test case in OUT_DIR (perf_<op>_<dtype>_<mem>_<JOB_ID>.json).
+# Set JOB_ID env var to include the job ID in filenames (required for CI).
+#
+# Usage:
+#   ./test/ttnn-jit/perf_ci/run_perf_collect.sh [OUT_DIR]
+#
+# Example:
+#   ./test/ttnn-jit/perf_ci/run_perf_collect.sh
+#   ./test/ttnn-jit/perf_ci/run_perf_collect.sh generated/jit_perf_reports/my_run
+
+set -e
+
+# Script lives in test/ttnn-jit/perf_ci/; go up three levels to repo root
+REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../../.." && pwd)"
+cd "$REPO_ROOT"
+
+# Activate venv if not already active
+if [ -z "$VIRTUAL_ENV" ] && [ -f env/activate ]; then
+  # shellcheck source=/dev/null
+  source env/activate
+fi
+
+OUT_DIR="${1:-generated/jit_perf_reports/run_$(date +%Y%m%d_%H%M%S)}"
+mkdir -p "$OUT_DIR"
+OUT_DIR="$(cd "$OUT_DIR" && pwd)"
+
+# Collect test ids from perf_tests.py (whatever is parametrized there)
+collect_out=$(mktemp)
+if ! pytest test/ttnn-jit/perf_ci/perf_tests.py --collect-only -q >"$collect_out" 2>&1; then
+  echo "Error: pytest collect failed:" >&2
+  cat "$collect_out" >&2
+  rm -f "$collect_out"
+  exit 1
+fi
+TESTS=($(sed -n 's/.*test_op_compare\[\(.*\)\]/\1/p' <"$collect_out"))
+if [ ${#TESTS[@]} -eq 0 ]; then
+  echo "Error: no test_op_compare[*] tests found in test/ttnn-jit/perf_ci/perf_tests.py. Pytest collect output:" >&2
+  cat "$collect_out" >&2
+  rm -f "$collect_out"
+  exit 1
+fi
+rm -f "$collect_out"
+echo "Collected ${#TESTS[@]} tests from perf_tests.py"
+
+export TT_METAL_DEVICE_PROFILER=1
+
+for tid in "${TESTS[@]}"; do
+  echo "=============================================="
+  echo "Running test_op_compare[$tid] ..."
+  echo "=============================================="
+  export TT_METAL_PROFILER_DIR="$OUT_DIR/$tid"
+  mkdir -p "$TT_METAL_PROFILER_DIR"
+  if ! python -m tracy -m -r -p "pytest test/ttnn-jit/perf_ci/perf_tests.py::test_op_compare[$tid]"; then
+    echo "Warning: test_op_compare[$tid] exited with non-zero status (results may still be present)."
+  fi
+done
+
+echo ""
+echo "Results written under: $OUT_DIR"
+echo "Summarizing..."
+JOB_ID_ARG=""
+if [ -n "$JOB_ID" ]; then
+  JOB_ID_ARG="--job-id $JOB_ID"
+fi
+if python test/ttnn-jit/perf_ci/summarize_perf_results.py "$OUT_DIR" --output-dir "$OUT_DIR" $JOB_ID_ARG; then
+  echo "Summary reports written to $OUT_DIR"
+else
+  echo "Warning: summarizer exited with an error (run dir may be partial)." >&2
+fi