From 76ecbd14805cdd4c068fdc5d6887d9cfe404aea9 Mon Sep 17 00:00:00 2001 From: Xu Zhao Date: Tue, 7 Oct 2025 09:30:46 -0400 Subject: [PATCH 1/6] simplify workflow to abtest --- .../_linux-benchmark-abtest-h100.yml | 125 ------------------ .github/workflows/_linux-benchmark-h100.yml | 86 ++++++++---- .github/workflows/_linux-benchmark-mi350.yml | 106 ++++++++++----- .github/workflows/nightly.yml | 21 +-- 4 files changed, 141 insertions(+), 197 deletions(-) delete mode 100644 .github/workflows/_linux-benchmark-abtest-h100.yml diff --git a/.github/workflows/_linux-benchmark-abtest-h100.yml b/.github/workflows/_linux-benchmark-abtest-h100.yml deleted file mode 100644 index 7522a541a..000000000 --- a/.github/workflows/_linux-benchmark-abtest-h100.yml +++ /dev/null @@ -1,125 +0,0 @@ -name: linux-benchmark-h100 -on: - workflow_call: - secrets: - TRITONBENCH_SCRIBE_GRAPHQL_ACCESS_TOKEN: - required: True - description: | - Tritonbench Scribe Graph Access Token - inputs: - benchmark_name: - required: True - type: string - description: | - Benchmark name - side_a_triton: - type: string - required: True - default: "triton-lang/triton" - description: | - Triton repository to test on side A, e.g., "triton-lang/triton" - side_a_commit: - type: string - required: True - description: | - Triton commit or tag to test on side A, e.g., "main" - side_b_triton: - type: string - required: True - default: "triton-lang/triton" - description: | - Triton repository to test on side B, e.g., "triton-lang/triton" - side_b_commit: - type: string - required: True - description: | - Triton commit or tag to test on side B, e.g., "main" - -jobs: - linux-benchmark-h100: - if: github.repository_owner == 'meta-pytorch' - runs-on: [gcp-h100-runner] - timeout-minutes: 240 - environment: docker-s3-upload - permissions: - id-token: write - contents: read - env: - SETUP_SCRIPT: "/workspace/setup_instance.sh" - RUNNER_TYPE: "gcp-h100-runner" - JOB_NAME: tritonbench-h100-abtest-${{ inputs.benchmark_name }} - TRITONBENCH_SCRIBE_GRAPHQL_ACCESS_TOKEN: ${{ secrets.TRITONBENCH_SCRIBE_GRAPHQL_ACCESS_TOKEN }} - AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} - AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} - steps: - - name: Checkout Tritonbench - uses: actions/checkout@v3 - with: - submodules: recursive - - name: Tune Nvidia GPU - run: | - bash .ci/gpu/tune-gcp-h100.sh - sudo ldconfig - nvidia-smi - - name: Authenticate with AWS - uses: aws-actions/configure-aws-credentials@v4 - with: - role-to-assume: arn:aws:iam::308535385114:role/gha_workflow_upload-benchmark-results - # The max duration enforced by the server side - role-duration-seconds: 18000 - aws-region: us-east-1 - - name: Compile Triton (Side A) - run: | - bash ./.ci/triton/install.sh --repo ${{ inputs.side_a_triton }} --commit ${{ inputs.side_a_commit }} --side a - - name: Benchmark Triton (Side A) - run: | - bash ./.ci/tritonbench/run-benchmark.sh ${{ inputs.benchmark_name }} --conda-env triton-side-a - mkdir -p benchmark-output - cp -r .benchmarks/${{ inputs.benchmark_name }} benchmark-output/triton-side-a - rm -rf .benchmarks || true - - name: Compile Triton (Side B) - run: | - bash ./.ci/triton/install.sh --repo ${{ inputs.side_b_triton }} --commit ${{ inputs.side_b_commit }} --side b - - name: Benchmark Triton (Side B) - run: | - bash ./.ci/tritonbench/run-benchmark.sh ${{ inputs.benchmark_name }} --conda-env triton-side-b - mkdir -p benchmark-output - cp -r ".benchmarks/${{ inputs.benchmark_name }}" benchmark-output/triton-side-b - rm -rf .benchmarks || true - - name: Upload result to GH Actions Artifact - uses: actions/upload-artifact@v4 - with: - name: ${{ env.JOB_NAME }} - path: benchmark-output/ - - name: Upload result to Scribe - run: | - . "${SETUP_SCRIPT}" - triton_side_a_json=$(find ./benchmark-output/triton-side-a -name "result.json" | sort -r | head -n 1) - python ./.ci/upload/scribe.py --json ${triton_side_a_json} - triton_side_b_json=$(find ./benchmark-output/triton-side-b -name "result.json" | sort -r | head -n 1) - python ./.ci/upload/scribe.py --json ${triton_side_b_json} - - name: Rewrite Tritonbench json to ClickHouse style - run: | - . "${SETUP_SCRIPT}" - triton_side_a_json=$(find ./benchmark-output/triton-side-a -name "result.json" | sort -r | head -n 1) - python ./.ci/test_infra/oss_ci_benchmark_v3.py --json "${triton_side_a_json}" \ - --output benchmark-output/results/triton-side-a.json - triton_side_b_json=$(find ./benchmark-output/triton-side-b -name "result.json" | sort -r | head -n 1) - python ./.ci/test_infra/oss_ci_benchmark_v3.py --json "${triton_side_b_json}" \ - --output benchmark-output/results/triton-side-b.json - - name: Setup uploader dependencies - run: | - sudo apt-get install -y python3-pip - - name: Upload result to ClickHouse - uses: pytorch/test-infra/.github/actions/upload-benchmark-results@main - with: - benchmark-results-dir: benchmark-output/results - dry-run: false - schema-version: v3 - github-token: ${{ secrets.GITHUB_TOKEN }} - - name: Restore Nvidia GPU - if: always() - run: | - bash .ci/gpu/reset-gcp-h100.sh - sudo ldconfig - nvidia-smi diff --git a/.github/workflows/_linux-benchmark-h100.yml b/.github/workflows/_linux-benchmark-h100.yml index 75e159184..4d7451c81 100644 --- a/.github/workflows/_linux-benchmark-h100.yml +++ b/.github/workflows/_linux-benchmark-h100.yml @@ -7,26 +7,38 @@ on: description: | Tritonbench Scribe Graph Access Token inputs: - benchmark_name: + test_type: required: True type: string - description: | - Benchmark name - conda_env: + descript: | + Type of the test (single or abtest) + benchmark_name: required: True type: string description: | - Conda environment to activate when testing Triton + Benchmark name side_a_triton: - required: False type: string + required: False + default: "triton-lang/triton" description: | - Triton repo name + Triton repository to test on side A, e.g., "triton-lang/triton" side_a_commit: + type: string + required: False + description: | + Triton commit or tag to test on side A, e.g., "main" + side_b_triton: + type: string required: False + default: "triton-lang/triton" + description: | + Triton repository to test on side B, e.g., "triton-lang/triton" + side_b_commit: type: string + required: False description: | - Triton repo commit + Triton commit or tag to test on side B, e.g., "main" jobs: linux-benchmark-h100: @@ -39,9 +51,9 @@ jobs: contents: read env: SETUP_SCRIPT: "/workspace/setup_instance.sh" - CONDA_ENV: ${{ inputs.conda_env }} RUNNER_TYPE: "gcp-h100-runner" - JOB_NAME: tritonbench-h100-${{ inputs.conda_env }}-${{ inputs.benchmark_name }} + JOB_NAME: tritonbench-h100-benchmark-${{ inputs.test_type }}-${{ inputs.benchmark_name }} + TRITONBENCH_SIDE_A_ENV: "triton-main" TRITONBENCH_SCRIBE_GRAPHQL_ACCESS_TOKEN: ${{ secrets.TRITONBENCH_SCRIBE_GRAPHQL_ACCESS_TOKEN }} AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} @@ -62,18 +74,29 @@ jobs: # The max duration enforced by the server side role-duration-seconds: 18000 aws-region: us-east-1 - - name: Compile Triton (On Demand) + - name: Compile Triton on Demand (Side A) if: ${{ inputs.side_a_triton && inputs.side_a_commit }} run: | - bash ./.ci/triton/compile.sh --repo ${{ inputs.side_a_triton }} --commit ${{ inputs.side_a_commit }} --side a - - name: Benchmarking + bash ./.ci/triton/install.sh --repo ${{ inputs.side_a_triton }} --commit ${{ inputs.side_a_commit }} --side a + export 'TRITONBENCH_SIDE_A_ENV=triton-side-a' >> $GITHUB_ENV + - name: Benchmark Triton (Side A) run: | - if [ -n "${{ inputs.side_a_triton }}" ] && [ -n "${{ inputs.side_a_commit }}" ]; then - bash .ci/tritonbench/run-benchmark.sh ${{ inputs.benchmark_name }} --conda-env triton-side-a - else - bash .ci/tritonbench/run-benchmark.sh ${{ inputs.benchmark_name }} - fi - cp -r ".benchmarks/${{ inputs.benchmark_name }}" benchmark-output + bash ./.ci/tritonbench/run-benchmark.sh ${{ inputs.benchmark_name }} --conda-env ${TRITONBENCH_SIDE_A_ENV} + mkdir -p benchmark-output + cp -r .benchmarks/${{ inputs.benchmark_name }} benchmark-output/${TRITONBENCH_SIDE_A_ENV} + rm -rf .benchmarks || true + - name: Compile Triton on Demand (Side B) + if: ${{ inputs.test_type == 'abtest' && inputs.side_b_triton && inputs.side_b_commit }} + run: | + bash ./.ci/triton/install.sh --repo ${{ inputs.side_b_triton }} --commit ${{ inputs.side_b_commit }} --side b + export 'TRITONBENCH_SIDE_A_ENV=triton-side-a' >> $GITHUB_ENV + - name: Benchmark Triton (Side B) + if: ${{ inputs.test_type == 'abtest' && inputs.side_b_triton && inputs.side_b_commit }} + run: | + bash ./.ci/tritonbench/run-benchmark.sh ${{ inputs.benchmark_name }} --conda-env --conda-env ${TRITONBENCH_SIDE_B_ENV} + mkdir -p benchmark-output + cp -r ".benchmarks/${{ inputs.benchmark_name }}" benchmark-output/${TRITONBENCH_SIDE_B_ENV} + rm -rf .benchmarks || true - name: Upload result to GH Actions Artifact uses: actions/upload-artifact@v4 with: @@ -82,21 +105,34 @@ jobs: - name: Upload result to Scribe run: | . "${SETUP_SCRIPT}" - latest_result_json=$(find ./benchmark-output -name "result.json" | sort -r | head -n 1) - python ./.ci/upload/scribe.py --json ${latest_result_json} + if [ -n "${TRITONBENCH_SIDE_A_ENV}" ]; then + triton_side_a_json=$(find ./benchmark-output/${TRITONBENCH_SIDE_A_ENV} -name "result.json" | sort -r | head -n 1) + python ./.ci/upload/scribe.py --json ${triton_side_a_json} + fi + if [ -n "${TRITONBENCH_SIDE_B_ENV}" ]; then + triton_side_b_json=$(find ./benchmark-output/${TRITONBENCH_SIDE_B_ENV} -name "result.json" | sort -r | head -n 1) + python ./.ci/upload/scribe.py --json ${triton_side_b_json} + fi - name: Rewrite Tritonbench json to ClickHouse style run: | . "${SETUP_SCRIPT}" - latest_result_json=$(find ./benchmark-output -name "result.json" | sort -r | head -n 1) - python ./.ci/test_infra/oss_ci_benchmark_v3.py --json ${latest_result_json} \ - --output benchmark-output/results/result.json + if [ -n "${TRITONBENCH_SIDE_A_ENV}"" ]; then + triton_side_a_json=$(find ./benchmark-output/${TRITONBENCH_SIDE_A_ENV} -name "result.json" | sort -r | head -n 1) + python ./.ci/test_infra/oss_ci_benchmark_v3.py --json "${triton_side_a_json}" \ + --output "benchmark-output/clickouse-results/result-${TRITONBENCH_SIDE_A_ENV}.json" + fi + if [ -n "${TRITONBENCH_SIDE_B_ENV}"" ]; then + triton_side_a_json=$(find ./benchmark-output/${TRITONBENCH_SIDE_B_ENV} -name "result.json" | sort -r | head -n 1) + python ./.ci/test_infra/oss_ci_benchmark_v3.py --json "${triton_side_a_json}" \ + --output "benchmark-output/clickouse-results/result-${TRITONBENCH_SIDE_B_ENV}.json" + fi - name: Setup uploader dependencies run: | sudo apt-get install -y python3-pip - name: Upload result to ClickHouse uses: pytorch/test-infra/.github/actions/upload-benchmark-results@main with: - benchmark-results-dir: benchmark-output/results + benchmark-results-dir: benchmark-output/clickouse-results dry-run: false schema-version: v3 github-token: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/_linux-benchmark-mi350.yml b/.github/workflows/_linux-benchmark-mi350.yml index f769e293f..72ae58f70 100644 --- a/.github/workflows/_linux-benchmark-mi350.yml +++ b/.github/workflows/_linux-benchmark-mi350.yml @@ -7,26 +7,38 @@ on: description: | Tritonbench Scribe Graph Access Token inputs: - benchmark_name: + test_type: required: True type: string - description: | - Benchmark name - conda_env: + descript: | + Type of the test (single or abtest) + benchmark_name: required: True type: string description: | - Conda environment to activate when testing Triton + Benchmark name side_a_triton: - required: False type: string + required: False + default: "triton-lang/triton" description: | - Triton repo name + Triton repository to test on side A, e.g., "triton-lang/triton" side_a_commit: + type: string required: False + description: | + Triton commit or tag to test on side A, e.g., "main" + side_b_triton: type: string + required: False + default: "triton-lang/triton" + description: | + Triton repository to test on side B, e.g., "triton-lang/triton" + side_b_commit: + type: string + required: False description: | - Triton repo commit + Triton commit or tag to test on side B, e.g., "main" jobs: linux-benchmark-mi350: @@ -39,10 +51,10 @@ jobs: contents: read env: SETUP_SCRIPT: "/workspace/setup_instance.sh" - CONDA_ENV: ${{ inputs.conda_env }} RUNNER_TYPE: "amd-mi350-runner" DOCKER_IMAGE: "ghcr.io/meta-pytorch/tritonbench:rocm-latest" - JOB_NAME: tritonbench-mi350-${{ inputs.conda_env }}-${{ inputs.benchmark_name }} + TRITONBENCH_SIDE_A_ENV: "triton-main" + JOB_NAME: tritonbench-mi350-${{ inputs.test_type }}-${{ inputs.benchmark_name }} TRITONBENCH_SCRIBE_GRAPHQL_ACCESS_TOKEN: ${{ secrets.TRITONBENCH_SCRIBE_GRAPHQL_ACCESS_TOKEN }} AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} @@ -74,7 +86,7 @@ jobs: container_name=$(docker run \ ${GPU_FLAG:-} \ - -e CONDA_ENV \ + -e CONDA_ENV="${TRITONBENCH_SIDE_A_ENV}" \ --ipc=host \ --tty \ --detach \ @@ -89,30 +101,47 @@ jobs: # write container id to env echo "TRITONBENCH_CONTAINER_ID=${container_name}" >> $GITHUB_ENV - - name: Compile Triton (On Demand) + - name: Compile Triton side A (On Demand) if: ${{ inputs.side_a_triton && inputs.side_a_commit }} run: | docker exec -t -w /tmp/workspace "${TRITONBENCH_CONTAINER_ID}" bash -c " set -eux bash ./.ci/triton/install.sh --repo ${{ inputs.side_a_triton }} --commit ${{ inputs.side_a_commit }} --side a " - - name: Benchmarking + export 'TRITONBENCH_SIDE_A_ENV="triton-side-a"' >> ${GITHUB_ENV} + - name: Benchmark Triton (Side A) run: | set -eux - if [ -n "${{ inputs.side_a_triton }}" ] && [ -n "${{ inputs.side_a_commit }}" ]; then - docker exec -t -w /tmp/workspace "${TRITONBENCH_CONTAINER_ID}" bash -c " - set -eux - bash .ci/tritonbench/run-benchmark.sh ${{ inputs.benchmark_name }} --conda-env triton-side-a - " - else - docker exec -t -w /tmp/workspace "${TRITONBENCH_CONTAINER_ID}" bash -c " - set -eux - bash .ci/tritonbench/run-benchmark.sh ${{ inputs.benchmark_name }} - " - fi - cp -r ".benchmarks/${{ inputs.benchmark_name }}" benchmark-output + docker exec -t -w /tmp/workspace "${TRITONBENCH_CONTAINER_ID}" bash -c " + set -eux + bash .ci/tritonbench/run-benchmark.sh ${{ inputs.benchmark_name }} --conda-env ${TRITONBENCH_SIDE_A_ENV} + " + cp -r ".benchmarks/${{ inputs.benchmark_name }}" benchmark-output/${TRITONBENCH_SIDE_A_ENV} + rm -rf .benchmarks || true # post-process result.json - latest_result_json=$(find ./benchmark-output -name "result.json" | sort -r | head -n 1) + latest_result_json=$(find ./benchmark-output/${TRITONBENCH_SIDE_A_ENV} -name "result.json" | sort -r | head -n 1) + python3 ./.ci/test_infra/oss_ci_benchmark_v3.py --json ${latest_result_json} \ + --add-github-env --output ${latest_result_json} + - name: Compile Triton side B (On Demand) + if: ${{ inputs.test_type == 'abtest' && inputs.side_b_triton && inputs.side_b_commit }} + run: | + docker exec -t -w /tmp/workspace "${TRITONBENCH_CONTAINER_ID}" bash -c " + set -eux + bash ./.ci/triton/install.sh --repo ${{ inputs.side_b_triton }} --commit ${{ inputs.side_b_commit }} --side a + " + export 'TRITONBENCH_SIDE_B_ENV="triton-side-b"' >> ${GITHUB_ENV} + - name: Benchmark Triton (Side B) + if: ${{ inputs.test_type == 'abtest' && inputs.side_b_triton && inputs.side_b_commit }} + run: | + set -eux + docker exec -t -w /tmp/workspace "${TRITONBENCH_CONTAINER_ID}" bash -c " + set -eux + bash .ci/tritonbench/run-benchmark.sh ${{ inputs.benchmark_name }} --conda-env ${TRITONBENCH_SIDE_B_ENV} + " + cp -r ".benchmarks/${{ inputs.benchmark_name }}" benchmark-output/${TRITONBENCH_SIDE_B_ENV} + rm -rf .benchmarks || true + # post-process result.json + latest_result_json=$(find ./benchmark-output/${TRITONBENCH_SIDE_B_ENV} -name "result.json" | sort -r | head -n 1) python3 ./.ci/test_infra/oss_ci_benchmark_v3.py --json ${latest_result_json} \ --add-github-env --output ${latest_result_json} - name: Upload result to GH Actions Artifact @@ -122,17 +151,30 @@ jobs: path: benchmark-output/ - name: Upload result to Scribe run: | - latest_result_json=$(find ./benchmark-output -name "result.json" | sort -r | head -n 1) - python3 ./.ci/upload/scribe.py --json ${latest_result_json} + if [ -n "${TRITONBENCH_SIDE_A_ENV}" ]; then + latest_result_json=$(find ./benchmark-output/${TRITONBENCH_SIDE_A_ENV} -name "result.json" | sort -r | head -n 1) + python3 ./.ci/upload/scribe.py --json ${latest_result_json} + fi + if [ -n "${TRITONBENCH_SIDE_B_ENV}" ]; then + latest_result_json=$(find ./benchmark-output/${TRITONBENCH_SIDE_B_ENV} -name "result.json" | sort -r | head -n 1) + python3 ./.ci/upload/scribe.py --json ${latest_result_json} + fi - name: Rewrite Tritonbench json to ClickHouse style run: | - latest_result_json=$(find ./benchmark-output -name "result.json" | sort -r | head -n 1) - python3 ./.ci/test_infra/oss_ci_benchmark_v3.py --json ${latest_result_json} \ - --output benchmark-output/results/result.json + if [ -n "${TRITONBENCH_SIDE_A_ENV}" ]; then + latest_result_json=$(find ./benchmark-output/${TRITONBENCH_SIDE_A_ENV} -name "result.json" | sort -r | head -n 1) + python3 ./.ci/test_infra/oss_ci_benchmark_v3.py --json ${latest_result_json} \ + --output benchmark-output/clickhouse-results/result-${TRITONBENCH_SIDE_A_ENV}.json + fi + if [ -n "${TRITONBENCH_SIDE_B_ENV}" ]; then + latest_result_json=$(find ./benchmark-output/${TRITONBENCH_SIDE_B_ENV} -name "result.json" | sort -r | head -n 1) + python3 ./.ci/test_infra/oss_ci_benchmark_v3.py --json ${latest_result_json} \ + --output benchmark-output/clickhouse-results/result-${TRITONBENCH_SIDE_B_ENV}.json + fi - name: Upload result to ClickHouse uses: pytorch/test-infra/.github/actions/upload-benchmark-results@main with: - benchmark-results-dir: benchmark-output/results + benchmark-results-dir: benchmark-output/clickhouse-results dry-run: false schema-version: v3 github-token: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml index 6549e4408..f6b8d4c2a 100644 --- a/.github/workflows/nightly.yml +++ b/.github/workflows/nightly.yml @@ -32,30 +32,21 @@ on: - .github/workflows/nightly.yml jobs: - h100-triton-main-nightly-periodic: + h100-triton-main-nightly-benchmark: uses: ./.github/workflows/_linux-benchmark-h100.yml - if: ${{ inputs.test_type != 'abtest' }} with: - conda_env: "triton-main" + test_type: ${{ inputs.test_type }} benchmark_name: "nightly" side_a_triton: ${{ inputs.side_a_triton }} side_a_commit: ${{ inputs.side_a_commit }} + side_b_triton: ${{ inputs.side_b_triton }} + side_b_commit: ${{ inputs.side_b_commit }} secrets: TRITONBENCH_SCRIBE_GRAPHQL_ACCESS_TOKEN: ${{ secrets.TRITONBENCH_SCRIBE_GRAPHQL_ACCESS_TOKEN }} - mi350-triton-main-nightly-periodic: + mi350-triton-main-nightly-benchmark: uses: ./.github/workflows/_linux-benchmark-mi350.yml - if: ${{ inputs.test_type != 'abtest' }} - with: - conda_env: "triton-main" - benchmark_name: "nightly" - side_a_triton: ${{ inputs.side_a_triton }} - side_a_commit: ${{ inputs.side_a_commit }} - secrets: - TRITONBENCH_SCRIBE_GRAPHQL_ACCESS_TOKEN: ${{ secrets.TRITONBENCH_SCRIBE_GRAPHQL_ACCESS_TOKEN }} - h100-triton-nightly-abtest: - uses: ./.github/workflows/_linux-benchmark-abtest-h100.yml - if: ${{ inputs.test_type == 'abtest' }} with: + test_type: ${{ inputs.test_type }} benchmark_name: "nightly" side_a_triton: ${{ inputs.side_a_triton }} side_a_commit: ${{ inputs.side_a_commit }} From 944b75cea288ae16ba60943ab32355f46da13433 Mon Sep 17 00:00:00 2001 From: Xu Zhao Date: Tue, 7 Oct 2025 10:27:57 -0400 Subject: [PATCH 2/6] fix description --- .github/workflows/_linux-benchmark-h100.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/_linux-benchmark-h100.yml b/.github/workflows/_linux-benchmark-h100.yml index 4d7451c81..12d0eb69a 100644 --- a/.github/workflows/_linux-benchmark-h100.yml +++ b/.github/workflows/_linux-benchmark-h100.yml @@ -10,7 +10,7 @@ on: test_type: required: True type: string - descript: | + description: | Type of the test (single or abtest) benchmark_name: required: True From 35c65920b16f5ed85b494e26009e76d68233fed3 Mon Sep 17 00:00:00 2001 From: Xu Zhao Date: Tue, 7 Oct 2025 10:49:24 -0400 Subject: [PATCH 3/6] other bug fixes --- .github/workflows/_linux-benchmark-mi350.yml | 2 +- .github/workflows/compile-time.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/_linux-benchmark-mi350.yml b/.github/workflows/_linux-benchmark-mi350.yml index 72ae58f70..e5ced2d73 100644 --- a/.github/workflows/_linux-benchmark-mi350.yml +++ b/.github/workflows/_linux-benchmark-mi350.yml @@ -10,7 +10,7 @@ on: test_type: required: True type: string - descript: | + description: | Type of the test (single or abtest) benchmark_name: required: True diff --git a/.github/workflows/compile-time.yaml b/.github/workflows/compile-time.yaml index 3bce0ec6c..786ab03ca 100644 --- a/.github/workflows/compile-time.yaml +++ b/.github/workflows/compile-time.yaml @@ -35,7 +35,7 @@ jobs: h100-triton-main-compile-time-benchmark: uses: ./.github/workflows/_linux-benchmark-h100.yml with: - conda_env: "triton-main" + test_type: ${{ inputs.test_type }} benchmark_name: "compile_time" side_a_triton: ${{ inputs.side_a_triton }} side_a_commit: ${{ inputs.side_a_commit }} From e64ca58671d15af70bf74c6ba380bc00d9b65357 Mon Sep 17 00:00:00 2001 From: Xu Zhao Date: Tue, 7 Oct 2025 10:54:09 -0400 Subject: [PATCH 4/6] test --- .github/workflows/_linux-benchmark-h100.yml | 8 ++++---- .github/workflows/_linux-benchmark-mi350.yml | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/.github/workflows/_linux-benchmark-h100.yml b/.github/workflows/_linux-benchmark-h100.yml index 12d0eb69a..846952698 100644 --- a/.github/workflows/_linux-benchmark-h100.yml +++ b/.github/workflows/_linux-benchmark-h100.yml @@ -105,23 +105,23 @@ jobs: - name: Upload result to Scribe run: | . "${SETUP_SCRIPT}" - if [ -n "${TRITONBENCH_SIDE_A_ENV}" ]; then + if [[ -n "${TRITONBENCH_SIDE_A_ENV}" ]]; then triton_side_a_json=$(find ./benchmark-output/${TRITONBENCH_SIDE_A_ENV} -name "result.json" | sort -r | head -n 1) python ./.ci/upload/scribe.py --json ${triton_side_a_json} fi - if [ -n "${TRITONBENCH_SIDE_B_ENV}" ]; then + if [[ -n "${TRITONBENCH_SIDE_B_ENV}" ]]; then triton_side_b_json=$(find ./benchmark-output/${TRITONBENCH_SIDE_B_ENV} -name "result.json" | sort -r | head -n 1) python ./.ci/upload/scribe.py --json ${triton_side_b_json} fi - name: Rewrite Tritonbench json to ClickHouse style run: | . "${SETUP_SCRIPT}" - if [ -n "${TRITONBENCH_SIDE_A_ENV}"" ]; then + if [[ -n "${TRITONBENCH_SIDE_A_ENV}"" ]]; then triton_side_a_json=$(find ./benchmark-output/${TRITONBENCH_SIDE_A_ENV} -name "result.json" | sort -r | head -n 1) python ./.ci/test_infra/oss_ci_benchmark_v3.py --json "${triton_side_a_json}" \ --output "benchmark-output/clickouse-results/result-${TRITONBENCH_SIDE_A_ENV}.json" fi - if [ -n "${TRITONBENCH_SIDE_B_ENV}"" ]; then + if [[ -n "${TRITONBENCH_SIDE_B_ENV}"" ]]; then triton_side_a_json=$(find ./benchmark-output/${TRITONBENCH_SIDE_B_ENV} -name "result.json" | sort -r | head -n 1) python ./.ci/test_infra/oss_ci_benchmark_v3.py --json "${triton_side_a_json}" \ --output "benchmark-output/clickouse-results/result-${TRITONBENCH_SIDE_B_ENV}.json" diff --git a/.github/workflows/_linux-benchmark-mi350.yml b/.github/workflows/_linux-benchmark-mi350.yml index e5ced2d73..3503853c2 100644 --- a/.github/workflows/_linux-benchmark-mi350.yml +++ b/.github/workflows/_linux-benchmark-mi350.yml @@ -151,22 +151,22 @@ jobs: path: benchmark-output/ - name: Upload result to Scribe run: | - if [ -n "${TRITONBENCH_SIDE_A_ENV}" ]; then + if [[ -n "${TRITONBENCH_SIDE_A_ENV}" ]]; then latest_result_json=$(find ./benchmark-output/${TRITONBENCH_SIDE_A_ENV} -name "result.json" | sort -r | head -n 1) python3 ./.ci/upload/scribe.py --json ${latest_result_json} fi - if [ -n "${TRITONBENCH_SIDE_B_ENV}" ]; then + if [[ -n "${TRITONBENCH_SIDE_B_ENV}" ]]; then latest_result_json=$(find ./benchmark-output/${TRITONBENCH_SIDE_B_ENV} -name "result.json" | sort -r | head -n 1) python3 ./.ci/upload/scribe.py --json ${latest_result_json} fi - name: Rewrite Tritonbench json to ClickHouse style run: | - if [ -n "${TRITONBENCH_SIDE_A_ENV}" ]; then + if [[ -n "${TRITONBENCH_SIDE_A_ENV}" ]]; then latest_result_json=$(find ./benchmark-output/${TRITONBENCH_SIDE_A_ENV} -name "result.json" | sort -r | head -n 1) python3 ./.ci/test_infra/oss_ci_benchmark_v3.py --json ${latest_result_json} \ --output benchmark-output/clickhouse-results/result-${TRITONBENCH_SIDE_A_ENV}.json fi - if [ -n "${TRITONBENCH_SIDE_B_ENV}" ]; then + if [[ -n "${TRITONBENCH_SIDE_B_ENV}" ]]; then latest_result_json=$(find ./benchmark-output/${TRITONBENCH_SIDE_B_ENV} -name "result.json" | sort -r | head -n 1) python3 ./.ci/test_infra/oss_ci_benchmark_v3.py --json ${latest_result_json} \ --output benchmark-output/clickhouse-results/result-${TRITONBENCH_SIDE_B_ENV}.json From 5f5d43e4a916ff1e69031870132aa8505b1df710 Mon Sep 17 00:00:00 2001 From: Xu Zhao Date: Tue, 7 Oct 2025 10:56:05 -0400 Subject: [PATCH 5/6] bugfix --- .github/workflows/_linux-benchmark-h100.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/_linux-benchmark-h100.yml b/.github/workflows/_linux-benchmark-h100.yml index 846952698..d3cc3af92 100644 --- a/.github/workflows/_linux-benchmark-h100.yml +++ b/.github/workflows/_linux-benchmark-h100.yml @@ -119,12 +119,12 @@ jobs: if [[ -n "${TRITONBENCH_SIDE_A_ENV}"" ]]; then triton_side_a_json=$(find ./benchmark-output/${TRITONBENCH_SIDE_A_ENV} -name "result.json" | sort -r | head -n 1) python ./.ci/test_infra/oss_ci_benchmark_v3.py --json "${triton_side_a_json}" \ - --output "benchmark-output/clickouse-results/result-${TRITONBENCH_SIDE_A_ENV}.json" + --output "benchmark-output/clickhouse-results/result-${TRITONBENCH_SIDE_A_ENV}.json" fi if [[ -n "${TRITONBENCH_SIDE_B_ENV}"" ]]; then triton_side_a_json=$(find ./benchmark-output/${TRITONBENCH_SIDE_B_ENV} -name "result.json" | sort -r | head -n 1) python ./.ci/test_infra/oss_ci_benchmark_v3.py --json "${triton_side_a_json}" \ - --output "benchmark-output/clickouse-results/result-${TRITONBENCH_SIDE_B_ENV}.json" + --output "benchmark-output/clickhouse-results/result-${TRITONBENCH_SIDE_B_ENV}.json" fi - name: Setup uploader dependencies run: | @@ -132,7 +132,7 @@ jobs: - name: Upload result to ClickHouse uses: pytorch/test-infra/.github/actions/upload-benchmark-results@main with: - benchmark-results-dir: benchmark-output/clickouse-results + benchmark-results-dir: benchmark-output/clickhouse-results dry-run: false schema-version: v3 github-token: ${{ secrets.GITHUB_TOKEN }} From 8359129e7e34c7cb2d83de9720cfc1467b3d9092 Mon Sep 17 00:00:00 2001 From: Xu Zhao Date: Tue, 7 Oct 2025 16:00:45 -0400 Subject: [PATCH 6/6] fix mi350 --- .github/workflows/_linux-benchmark-mi350.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/_linux-benchmark-mi350.yml b/.github/workflows/_linux-benchmark-mi350.yml index 3503853c2..dc9de8bcd 100644 --- a/.github/workflows/_linux-benchmark-mi350.yml +++ b/.github/workflows/_linux-benchmark-mi350.yml @@ -116,6 +116,7 @@ jobs: set -eux bash .ci/tritonbench/run-benchmark.sh ${{ inputs.benchmark_name }} --conda-env ${TRITONBENCH_SIDE_A_ENV} " + mkdir -p benchmark-output cp -r ".benchmarks/${{ inputs.benchmark_name }}" benchmark-output/${TRITONBENCH_SIDE_A_ENV} rm -rf .benchmarks || true # post-process result.json @@ -138,6 +139,7 @@ jobs: set -eux bash .ci/tritonbench/run-benchmark.sh ${{ inputs.benchmark_name }} --conda-env ${TRITONBENCH_SIDE_B_ENV} " + mkdir -p benchmark-output cp -r ".benchmarks/${{ inputs.benchmark_name }}" benchmark-output/${TRITONBENCH_SIDE_B_ENV} rm -rf .benchmarks || true # post-process result.json