diff --git a/.github/workflows/_linux-benchmark-abtest-h100.yml b/.github/workflows/_linux-benchmark-abtest-h100.yml deleted file mode 100644 index 7522a541..00000000 --- a/.github/workflows/_linux-benchmark-abtest-h100.yml +++ /dev/null @@ -1,125 +0,0 @@ -name: linux-benchmark-h100 -on: - workflow_call: - secrets: - TRITONBENCH_SCRIBE_GRAPHQL_ACCESS_TOKEN: - required: True - description: | - Tritonbench Scribe Graph Access Token - inputs: - benchmark_name: - required: True - type: string - description: | - Benchmark name - side_a_triton: - type: string - required: True - default: "triton-lang/triton" - description: | - Triton repository to test on side A, e.g., "triton-lang/triton" - side_a_commit: - type: string - required: True - description: | - Triton commit or tag to test on side A, e.g., "main" - side_b_triton: - type: string - required: True - default: "triton-lang/triton" - description: | - Triton repository to test on side B, e.g., "triton-lang/triton" - side_b_commit: - type: string - required: True - description: | - Triton commit or tag to test on side B, e.g., "main" - -jobs: - linux-benchmark-h100: - if: github.repository_owner == 'meta-pytorch' - runs-on: [gcp-h100-runner] - timeout-minutes: 240 - environment: docker-s3-upload - permissions: - id-token: write - contents: read - env: - SETUP_SCRIPT: "/workspace/setup_instance.sh" - RUNNER_TYPE: "gcp-h100-runner" - JOB_NAME: tritonbench-h100-abtest-${{ inputs.benchmark_name }} - TRITONBENCH_SCRIBE_GRAPHQL_ACCESS_TOKEN: ${{ secrets.TRITONBENCH_SCRIBE_GRAPHQL_ACCESS_TOKEN }} - AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} - AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} - steps: - - name: Checkout Tritonbench - uses: actions/checkout@v3 - with: - submodules: recursive - - name: Tune Nvidia GPU - run: | - bash .ci/gpu/tune-gcp-h100.sh - sudo ldconfig - nvidia-smi - - name: Authenticate with AWS - uses: aws-actions/configure-aws-credentials@v4 - with: - role-to-assume: arn:aws:iam::308535385114:role/gha_workflow_upload-benchmark-results - # The max duration enforced by the server side - role-duration-seconds: 18000 - aws-region: us-east-1 - - name: Compile Triton (Side A) - run: | - bash ./.ci/triton/install.sh --repo ${{ inputs.side_a_triton }} --commit ${{ inputs.side_a_commit }} --side a - - name: Benchmark Triton (Side A) - run: | - bash ./.ci/tritonbench/run-benchmark.sh ${{ inputs.benchmark_name }} --conda-env triton-side-a - mkdir -p benchmark-output - cp -r .benchmarks/${{ inputs.benchmark_name }} benchmark-output/triton-side-a - rm -rf .benchmarks || true - - name: Compile Triton (Side B) - run: | - bash ./.ci/triton/install.sh --repo ${{ inputs.side_b_triton }} --commit ${{ inputs.side_b_commit }} --side b - - name: Benchmark Triton (Side B) - run: | - bash ./.ci/tritonbench/run-benchmark.sh ${{ inputs.benchmark_name }} --conda-env triton-side-b - mkdir -p benchmark-output - cp -r ".benchmarks/${{ inputs.benchmark_name }}" benchmark-output/triton-side-b - rm -rf .benchmarks || true - - name: Upload result to GH Actions Artifact - uses: actions/upload-artifact@v4 - with: - name: ${{ env.JOB_NAME }} - path: benchmark-output/ - - name: Upload result to Scribe - run: | - . "${SETUP_SCRIPT}" - triton_side_a_json=$(find ./benchmark-output/triton-side-a -name "result.json" | sort -r | head -n 1) - python ./.ci/upload/scribe.py --json ${triton_side_a_json} - triton_side_b_json=$(find ./benchmark-output/triton-side-b -name "result.json" | sort -r | head -n 1) - python ./.ci/upload/scribe.py --json ${triton_side_b_json} - - name: Rewrite Tritonbench json to ClickHouse style - run: | - . "${SETUP_SCRIPT}" - triton_side_a_json=$(find ./benchmark-output/triton-side-a -name "result.json" | sort -r | head -n 1) - python ./.ci/test_infra/oss_ci_benchmark_v3.py --json "${triton_side_a_json}" \ - --output benchmark-output/results/triton-side-a.json - triton_side_b_json=$(find ./benchmark-output/triton-side-b -name "result.json" | sort -r | head -n 1) - python ./.ci/test_infra/oss_ci_benchmark_v3.py --json "${triton_side_b_json}" \ - --output benchmark-output/results/triton-side-b.json - - name: Setup uploader dependencies - run: | - sudo apt-get install -y python3-pip - - name: Upload result to ClickHouse - uses: pytorch/test-infra/.github/actions/upload-benchmark-results@main - with: - benchmark-results-dir: benchmark-output/results - dry-run: false - schema-version: v3 - github-token: ${{ secrets.GITHUB_TOKEN }} - - name: Restore Nvidia GPU - if: always() - run: | - bash .ci/gpu/reset-gcp-h100.sh - sudo ldconfig - nvidia-smi diff --git a/.github/workflows/_linux-benchmark-h100.yml b/.github/workflows/_linux-benchmark-h100.yml index 75e15918..d3cc3af9 100644 --- a/.github/workflows/_linux-benchmark-h100.yml +++ b/.github/workflows/_linux-benchmark-h100.yml @@ -7,26 +7,38 @@ on: description: | Tritonbench Scribe Graph Access Token inputs: - benchmark_name: + test_type: required: True type: string description: | - Benchmark name - conda_env: + Type of the test (single or abtest) + benchmark_name: required: True type: string description: | - Conda environment to activate when testing Triton + Benchmark name side_a_triton: - required: False type: string + required: False + default: "triton-lang/triton" description: | - Triton repo name + Triton repository to test on side A, e.g., "triton-lang/triton" side_a_commit: + type: string required: False + description: | + Triton commit or tag to test on side A, e.g., "main" + side_b_triton: type: string + required: False + default: "triton-lang/triton" description: | - Triton repo commit + Triton repository to test on side B, e.g., "triton-lang/triton" + side_b_commit: + type: string + required: False + description: | + Triton commit or tag to test on side B, e.g., "main" jobs: linux-benchmark-h100: @@ -39,9 +51,9 @@ jobs: contents: read env: SETUP_SCRIPT: "/workspace/setup_instance.sh" - CONDA_ENV: ${{ inputs.conda_env }} RUNNER_TYPE: "gcp-h100-runner" - JOB_NAME: tritonbench-h100-${{ inputs.conda_env }}-${{ inputs.benchmark_name }} + JOB_NAME: tritonbench-h100-benchmark-${{ inputs.test_type }}-${{ inputs.benchmark_name }} + TRITONBENCH_SIDE_A_ENV: "triton-main" TRITONBENCH_SCRIBE_GRAPHQL_ACCESS_TOKEN: ${{ secrets.TRITONBENCH_SCRIBE_GRAPHQL_ACCESS_TOKEN }} AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} @@ -62,18 +74,29 @@ jobs: # The max duration enforced by the server side role-duration-seconds: 18000 aws-region: us-east-1 - - name: Compile Triton (On Demand) + - name: Compile Triton on Demand (Side A) if: ${{ inputs.side_a_triton && inputs.side_a_commit }} run: | - bash ./.ci/triton/compile.sh --repo ${{ inputs.side_a_triton }} --commit ${{ inputs.side_a_commit }} --side a - - name: Benchmarking + bash ./.ci/triton/install.sh --repo ${{ inputs.side_a_triton }} --commit ${{ inputs.side_a_commit }} --side a + export 'TRITONBENCH_SIDE_A_ENV=triton-side-a' >> $GITHUB_ENV + - name: Benchmark Triton (Side A) run: | - if [ -n "${{ inputs.side_a_triton }}" ] && [ -n "${{ inputs.side_a_commit }}" ]; then - bash .ci/tritonbench/run-benchmark.sh ${{ inputs.benchmark_name }} --conda-env triton-side-a - else - bash .ci/tritonbench/run-benchmark.sh ${{ inputs.benchmark_name }} - fi - cp -r ".benchmarks/${{ inputs.benchmark_name }}" benchmark-output + bash ./.ci/tritonbench/run-benchmark.sh ${{ inputs.benchmark_name }} --conda-env ${TRITONBENCH_SIDE_A_ENV} + mkdir -p benchmark-output + cp -r .benchmarks/${{ inputs.benchmark_name }} benchmark-output/${TRITONBENCH_SIDE_A_ENV} + rm -rf .benchmarks || true + - name: Compile Triton on Demand (Side B) + if: ${{ inputs.test_type == 'abtest' && inputs.side_b_triton && inputs.side_b_commit }} + run: | + bash ./.ci/triton/install.sh --repo ${{ inputs.side_b_triton }} --commit ${{ inputs.side_b_commit }} --side b + export 'TRITONBENCH_SIDE_A_ENV=triton-side-a' >> $GITHUB_ENV + - name: Benchmark Triton (Side B) + if: ${{ inputs.test_type == 'abtest' && inputs.side_b_triton && inputs.side_b_commit }} + run: | + bash ./.ci/tritonbench/run-benchmark.sh ${{ inputs.benchmark_name }} --conda-env --conda-env ${TRITONBENCH_SIDE_B_ENV} + mkdir -p benchmark-output + cp -r ".benchmarks/${{ inputs.benchmark_name }}" benchmark-output/${TRITONBENCH_SIDE_B_ENV} + rm -rf .benchmarks || true - name: Upload result to GH Actions Artifact uses: actions/upload-artifact@v4 with: @@ -82,21 +105,34 @@ jobs: - name: Upload result to Scribe run: | . "${SETUP_SCRIPT}" - latest_result_json=$(find ./benchmark-output -name "result.json" | sort -r | head -n 1) - python ./.ci/upload/scribe.py --json ${latest_result_json} + if [[ -n "${TRITONBENCH_SIDE_A_ENV}" ]]; then + triton_side_a_json=$(find ./benchmark-output/${TRITONBENCH_SIDE_A_ENV} -name "result.json" | sort -r | head -n 1) + python ./.ci/upload/scribe.py --json ${triton_side_a_json} + fi + if [[ -n "${TRITONBENCH_SIDE_B_ENV}" ]]; then + triton_side_b_json=$(find ./benchmark-output/${TRITONBENCH_SIDE_B_ENV} -name "result.json" | sort -r | head -n 1) + python ./.ci/upload/scribe.py --json ${triton_side_b_json} + fi - name: Rewrite Tritonbench json to ClickHouse style run: | . "${SETUP_SCRIPT}" - latest_result_json=$(find ./benchmark-output -name "result.json" | sort -r | head -n 1) - python ./.ci/test_infra/oss_ci_benchmark_v3.py --json ${latest_result_json} \ - --output benchmark-output/results/result.json + if [[ -n "${TRITONBENCH_SIDE_A_ENV}"" ]]; then + triton_side_a_json=$(find ./benchmark-output/${TRITONBENCH_SIDE_A_ENV} -name "result.json" | sort -r | head -n 1) + python ./.ci/test_infra/oss_ci_benchmark_v3.py --json "${triton_side_a_json}" \ + --output "benchmark-output/clickhouse-results/result-${TRITONBENCH_SIDE_A_ENV}.json" + fi + if [[ -n "${TRITONBENCH_SIDE_B_ENV}"" ]]; then + triton_side_a_json=$(find ./benchmark-output/${TRITONBENCH_SIDE_B_ENV} -name "result.json" | sort -r | head -n 1) + python ./.ci/test_infra/oss_ci_benchmark_v3.py --json "${triton_side_a_json}" \ + --output "benchmark-output/clickhouse-results/result-${TRITONBENCH_SIDE_B_ENV}.json" + fi - name: Setup uploader dependencies run: | sudo apt-get install -y python3-pip - name: Upload result to ClickHouse uses: pytorch/test-infra/.github/actions/upload-benchmark-results@main with: - benchmark-results-dir: benchmark-output/results + benchmark-results-dir: benchmark-output/clickhouse-results dry-run: false schema-version: v3 github-token: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/_linux-benchmark-mi350.yml b/.github/workflows/_linux-benchmark-mi350.yml index f769e293..dc9de8bc 100644 --- a/.github/workflows/_linux-benchmark-mi350.yml +++ b/.github/workflows/_linux-benchmark-mi350.yml @@ -7,26 +7,38 @@ on: description: | Tritonbench Scribe Graph Access Token inputs: - benchmark_name: + test_type: required: True type: string description: | - Benchmark name - conda_env: + Type of the test (single or abtest) + benchmark_name: required: True type: string description: | - Conda environment to activate when testing Triton + Benchmark name side_a_triton: - required: False type: string + required: False + default: "triton-lang/triton" description: | - Triton repo name + Triton repository to test on side A, e.g., "triton-lang/triton" side_a_commit: + type: string required: False + description: | + Triton commit or tag to test on side A, e.g., "main" + side_b_triton: type: string + required: False + default: "triton-lang/triton" description: | - Triton repo commit + Triton repository to test on side B, e.g., "triton-lang/triton" + side_b_commit: + type: string + required: False + description: | + Triton commit or tag to test on side B, e.g., "main" jobs: linux-benchmark-mi350: @@ -39,10 +51,10 @@ jobs: contents: read env: SETUP_SCRIPT: "/workspace/setup_instance.sh" - CONDA_ENV: ${{ inputs.conda_env }} RUNNER_TYPE: "amd-mi350-runner" DOCKER_IMAGE: "ghcr.io/meta-pytorch/tritonbench:rocm-latest" - JOB_NAME: tritonbench-mi350-${{ inputs.conda_env }}-${{ inputs.benchmark_name }} + TRITONBENCH_SIDE_A_ENV: "triton-main" + JOB_NAME: tritonbench-mi350-${{ inputs.test_type }}-${{ inputs.benchmark_name }} TRITONBENCH_SCRIBE_GRAPHQL_ACCESS_TOKEN: ${{ secrets.TRITONBENCH_SCRIBE_GRAPHQL_ACCESS_TOKEN }} AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} @@ -74,7 +86,7 @@ jobs: container_name=$(docker run \ ${GPU_FLAG:-} \ - -e CONDA_ENV \ + -e CONDA_ENV="${TRITONBENCH_SIDE_A_ENV}" \ --ipc=host \ --tty \ --detach \ @@ -89,30 +101,49 @@ jobs: # write container id to env echo "TRITONBENCH_CONTAINER_ID=${container_name}" >> $GITHUB_ENV - - name: Compile Triton (On Demand) + - name: Compile Triton side A (On Demand) if: ${{ inputs.side_a_triton && inputs.side_a_commit }} run: | docker exec -t -w /tmp/workspace "${TRITONBENCH_CONTAINER_ID}" bash -c " set -eux bash ./.ci/triton/install.sh --repo ${{ inputs.side_a_triton }} --commit ${{ inputs.side_a_commit }} --side a " - - name: Benchmarking + export 'TRITONBENCH_SIDE_A_ENV="triton-side-a"' >> ${GITHUB_ENV} + - name: Benchmark Triton (Side A) run: | set -eux - if [ -n "${{ inputs.side_a_triton }}" ] && [ -n "${{ inputs.side_a_commit }}" ]; then - docker exec -t -w /tmp/workspace "${TRITONBENCH_CONTAINER_ID}" bash -c " - set -eux - bash .ci/tritonbench/run-benchmark.sh ${{ inputs.benchmark_name }} --conda-env triton-side-a - " - else - docker exec -t -w /tmp/workspace "${TRITONBENCH_CONTAINER_ID}" bash -c " - set -eux - bash .ci/tritonbench/run-benchmark.sh ${{ inputs.benchmark_name }} - " - fi - cp -r ".benchmarks/${{ inputs.benchmark_name }}" benchmark-output + docker exec -t -w /tmp/workspace "${TRITONBENCH_CONTAINER_ID}" bash -c " + set -eux + bash .ci/tritonbench/run-benchmark.sh ${{ inputs.benchmark_name }} --conda-env ${TRITONBENCH_SIDE_A_ENV} + " + mkdir -p benchmark-output + cp -r ".benchmarks/${{ inputs.benchmark_name }}" benchmark-output/${TRITONBENCH_SIDE_A_ENV} + rm -rf .benchmarks || true # post-process result.json - latest_result_json=$(find ./benchmark-output -name "result.json" | sort -r | head -n 1) + latest_result_json=$(find ./benchmark-output/${TRITONBENCH_SIDE_A_ENV} -name "result.json" | sort -r | head -n 1) + python3 ./.ci/test_infra/oss_ci_benchmark_v3.py --json ${latest_result_json} \ + --add-github-env --output ${latest_result_json} + - name: Compile Triton side B (On Demand) + if: ${{ inputs.test_type == 'abtest' && inputs.side_b_triton && inputs.side_b_commit }} + run: | + docker exec -t -w /tmp/workspace "${TRITONBENCH_CONTAINER_ID}" bash -c " + set -eux + bash ./.ci/triton/install.sh --repo ${{ inputs.side_b_triton }} --commit ${{ inputs.side_b_commit }} --side a + " + export 'TRITONBENCH_SIDE_B_ENV="triton-side-b"' >> ${GITHUB_ENV} + - name: Benchmark Triton (Side B) + if: ${{ inputs.test_type == 'abtest' && inputs.side_b_triton && inputs.side_b_commit }} + run: | + set -eux + docker exec -t -w /tmp/workspace "${TRITONBENCH_CONTAINER_ID}" bash -c " + set -eux + bash .ci/tritonbench/run-benchmark.sh ${{ inputs.benchmark_name }} --conda-env ${TRITONBENCH_SIDE_B_ENV} + " + mkdir -p benchmark-output + cp -r ".benchmarks/${{ inputs.benchmark_name }}" benchmark-output/${TRITONBENCH_SIDE_B_ENV} + rm -rf .benchmarks || true + # post-process result.json + latest_result_json=$(find ./benchmark-output/${TRITONBENCH_SIDE_B_ENV} -name "result.json" | sort -r | head -n 1) python3 ./.ci/test_infra/oss_ci_benchmark_v3.py --json ${latest_result_json} \ --add-github-env --output ${latest_result_json} - name: Upload result to GH Actions Artifact @@ -122,17 +153,30 @@ jobs: path: benchmark-output/ - name: Upload result to Scribe run: | - latest_result_json=$(find ./benchmark-output -name "result.json" | sort -r | head -n 1) - python3 ./.ci/upload/scribe.py --json ${latest_result_json} + if [[ -n "${TRITONBENCH_SIDE_A_ENV}" ]]; then + latest_result_json=$(find ./benchmark-output/${TRITONBENCH_SIDE_A_ENV} -name "result.json" | sort -r | head -n 1) + python3 ./.ci/upload/scribe.py --json ${latest_result_json} + fi + if [[ -n "${TRITONBENCH_SIDE_B_ENV}" ]]; then + latest_result_json=$(find ./benchmark-output/${TRITONBENCH_SIDE_B_ENV} -name "result.json" | sort -r | head -n 1) + python3 ./.ci/upload/scribe.py --json ${latest_result_json} + fi - name: Rewrite Tritonbench json to ClickHouse style run: | - latest_result_json=$(find ./benchmark-output -name "result.json" | sort -r | head -n 1) - python3 ./.ci/test_infra/oss_ci_benchmark_v3.py --json ${latest_result_json} \ - --output benchmark-output/results/result.json + if [[ -n "${TRITONBENCH_SIDE_A_ENV}" ]]; then + latest_result_json=$(find ./benchmark-output/${TRITONBENCH_SIDE_A_ENV} -name "result.json" | sort -r | head -n 1) + python3 ./.ci/test_infra/oss_ci_benchmark_v3.py --json ${latest_result_json} \ + --output benchmark-output/clickhouse-results/result-${TRITONBENCH_SIDE_A_ENV}.json + fi + if [[ -n "${TRITONBENCH_SIDE_B_ENV}" ]]; then + latest_result_json=$(find ./benchmark-output/${TRITONBENCH_SIDE_B_ENV} -name "result.json" | sort -r | head -n 1) + python3 ./.ci/test_infra/oss_ci_benchmark_v3.py --json ${latest_result_json} \ + --output benchmark-output/clickhouse-results/result-${TRITONBENCH_SIDE_B_ENV}.json + fi - name: Upload result to ClickHouse uses: pytorch/test-infra/.github/actions/upload-benchmark-results@main with: - benchmark-results-dir: benchmark-output/results + benchmark-results-dir: benchmark-output/clickhouse-results dry-run: false schema-version: v3 github-token: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/compile-time.yaml b/.github/workflows/compile-time.yaml index 3bce0ec6..786ab03c 100644 --- a/.github/workflows/compile-time.yaml +++ b/.github/workflows/compile-time.yaml @@ -35,7 +35,7 @@ jobs: h100-triton-main-compile-time-benchmark: uses: ./.github/workflows/_linux-benchmark-h100.yml with: - conda_env: "triton-main" + test_type: ${{ inputs.test_type }} benchmark_name: "compile_time" side_a_triton: ${{ inputs.side_a_triton }} side_a_commit: ${{ inputs.side_a_commit }} diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml index 6549e440..f6b8d4c2 100644 --- a/.github/workflows/nightly.yml +++ b/.github/workflows/nightly.yml @@ -32,30 +32,21 @@ on: - .github/workflows/nightly.yml jobs: - h100-triton-main-nightly-periodic: + h100-triton-main-nightly-benchmark: uses: ./.github/workflows/_linux-benchmark-h100.yml - if: ${{ inputs.test_type != 'abtest' }} with: - conda_env: "triton-main" + test_type: ${{ inputs.test_type }} benchmark_name: "nightly" side_a_triton: ${{ inputs.side_a_triton }} side_a_commit: ${{ inputs.side_a_commit }} + side_b_triton: ${{ inputs.side_b_triton }} + side_b_commit: ${{ inputs.side_b_commit }} secrets: TRITONBENCH_SCRIBE_GRAPHQL_ACCESS_TOKEN: ${{ secrets.TRITONBENCH_SCRIBE_GRAPHQL_ACCESS_TOKEN }} - mi350-triton-main-nightly-periodic: + mi350-triton-main-nightly-benchmark: uses: ./.github/workflows/_linux-benchmark-mi350.yml - if: ${{ inputs.test_type != 'abtest' }} - with: - conda_env: "triton-main" - benchmark_name: "nightly" - side_a_triton: ${{ inputs.side_a_triton }} - side_a_commit: ${{ inputs.side_a_commit }} - secrets: - TRITONBENCH_SCRIBE_GRAPHQL_ACCESS_TOKEN: ${{ secrets.TRITONBENCH_SCRIBE_GRAPHQL_ACCESS_TOKEN }} - h100-triton-nightly-abtest: - uses: ./.github/workflows/_linux-benchmark-abtest-h100.yml - if: ${{ inputs.test_type == 'abtest' }} with: + test_type: ${{ inputs.test_type }} benchmark_name: "nightly" side_a_triton: ${{ inputs.side_a_triton }} side_a_commit: ${{ inputs.side_a_commit }}