Add fused bias support for GMM and bias‑gradient/accumulate support f… #18
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Aiter Test | |
| on: | |
| push: | |
| branches: [main] | |
| pull_request: | |
| branches: [main] # Triggers on PRs targeting `main` | |
| workflow_dispatch: | |
| concurrency: | |
| group: ${{ github.workflow }}-${{ github.ref }} | |
| cancel-in-progress: ${{ github.ref != 'refs/heads/main' }} | |
| env: | |
| DOCKER_IMAGE: "rocm/pytorch:latest" | |
| jobs: | |
| check-signal: | |
| runs-on: ubuntu-latest | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| - name: Download and check signal artifact | |
| run: ./.github/scripts/check_signal.sh | |
| env: | |
| GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} | |
| GITHUB_SHA: ${{ github.sha }} | |
| define-runners: | |
| runs-on: ubuntu-latest | |
| needs: [check-signal] | |
| outputs: | |
| standard_runners: ${{ steps.machines.outputs.standard_runners }} | |
| multigpu_runners: ${{ steps.machines.outputs.multigpu_runners }} | |
| steps: | |
| - name: Define whether runs on MI35X | |
| env: | |
| PR_TITLE: ${{ github.event.pull_request.title }} | |
| id: machines | |
| run: | | |
| set -euo pipefail | |
| if [[ "${{ github.ref }}" == "refs/heads/main" ]]; then | |
| echo "It's main branch, running tests on MI325 and MI35X..." | |
| echo 'standard_runners=["aiter-mi355-1gpu"]' >> "$GITHUB_OUTPUT" | |
| echo 'multigpu_runners=["aiter-mi355-8gpu"]' >> "$GITHUB_OUTPUT" | |
| elif echo "${PR_TITLE}" | grep -qi "mi35x"; then | |
| echo "PR title contains 'MI35X', running tests on MI325 and MI35X..." | |
| echo 'standard_runners=["aiter-mi355-1gpu"]' >> "$GITHUB_OUTPUT" | |
| echo 'multigpu_runners=["aiter-mi355-8gpu"]' >> "$GITHUB_OUTPUT" | |
| else | |
| echo "Not main branch and PR title does not contain mi35x, only running on MI325..." | |
| echo 'standard_runners=["aiter-mi355-1gpu"]' >> "$GITHUB_OUTPUT" | |
| echo 'multigpu_runners=["aiter-mi355-8gpu"]' >> "$GITHUB_OUTPUT" | |
| fi | |
| echo "$GITHUB_OUTPUT" | |
| - name: Show output variable | |
| run: | | |
| echo "Standard: ${{ steps.machines.outputs.standard_runners }}" | |
| echo "Multipe: ${{ steps.machines.outputs.multigpu_runners }}" | |
| standard: | |
| needs: define-runners | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| runner: ${{ fromJSON(needs.define-runners.outputs.standard_runners) }} | |
| runs-on: ${{ matrix.runner }} | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| - name: Sync submodules | |
| run: | | |
| set -euo pipefail | |
| if [[ ${{ github.ref }} == "refs/heads/main" ]]; then | |
| echo "It's main branch, syncing latest CK..." | |
| git submodule sync | |
| git submodule update --init --recursive --remote --depth 1 --jobs 4 | |
| else | |
| echo "It's a PR branch, syncing specific CK..." | |
| git submodule sync | |
| git submodule update --init --recursive --depth 1 --jobs 4 | |
| fi | |
| - name: Clean up Rocm processes | |
| run: | | |
| ./.github/scripts/clean_up_rocm.sh | |
| - name: Run the container | |
| run: | | |
| set -ex | |
| echo "Starting container: aiter_test" | |
| if [ -f "/etc/podinfo/gha-render-devices" ]; then | |
| DEVICE_FLAG=$(cat /etc/podinfo/gha-render-devices) | |
| else | |
| DEVICE_FLAG="--device /dev/dri" | |
| fi | |
| docker run -dt \ | |
| --device=/dev/kfd $DEVICE_FLAG \ | |
| --shm-size=16G \ | |
| --network=host \ | |
| --group-add $(getent group render | cut -d: -f3) \ | |
| --group-add $(getent group video | cut -d: -f3) \ | |
| -v "${{ github.workspace }}:/workspace" \ | |
| -w /workspace \ | |
| --name aiter_test \ | |
| ${{ env.DOCKER_IMAGE }} | |
| - name: Setup pip config | |
| run: | | |
| docker exec -u root aiter_test bash -c "pip config set global.default-timeout 60" | |
| docker exec -u root aiter_test bash -c "pip config set global.retries 10" | |
| - name: Setup Aiter | |
| run: | | |
| set -ex | |
| echo "Setting up Aiter..." | |
| docker exec \ | |
| -w /workspace \ | |
| aiter_test \ | |
| bash -c "BUILD_TRITON=0 ./.github/scripts/build_aiter_triton.sh" | |
| - name: Tests | |
| run: | | |
| set -ex | |
| docker exec \ | |
| -w /workspace \ | |
| aiter_test \ | |
| bash -c "MAX_JOBS=20 ./.github/scripts/aiter_test.sh" | |
| - name: Upload test logs | |
| uses: actions/upload-artifact@v4 | |
| if: always() | |
| with: | |
| name: standard-test-log-${{ matrix.runner }} | |
| path: latest_test.log | |
| - name: Cleanup container | |
| if: always() | |
| run: | | |
| docker rm -f aiter_test || true | |
| - name: Clean up Rocm processes | |
| if: always() | |
| run: | | |
| ./.github/scripts/clean_up_rocm.sh | |
| multi-gpu: | |
| needs: define-runners | |
| # only run multi-gpu tests on main branch due to limited multi-gpu resources | |
| if: github.ref == 'refs/heads/main' | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| runner: ${{ fromJSON(needs.define-runners.outputs.multigpu_runners) }} | |
| runs-on: ${{ matrix.runner }} | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| - name: Sync submodules | |
| run: | | |
| set -euo pipefail | |
| if [[ ${{ github.ref }} == "refs/heads/main" ]]; then | |
| echo "It's main branch, syncing latest CK..." | |
| git submodule sync | |
| git submodule update --init --recursive --remote --depth 1 --jobs 4 | |
| else | |
| echo "It's a PR branch, syncing specific CK..." | |
| git submodule sync | |
| git submodule update --init --recursive --depth 1 --jobs 4 | |
| fi | |
| - name: Clean up Rocm processes | |
| run: ./.github/scripts/clean_up_rocm.sh | |
| - name: Run the container | |
| run: | | |
| set -ex | |
| echo "Starting container: aiter_test" | |
| if [ -f "/etc/podinfo/gha-render-devices" ]; then | |
| DEVICE_FLAG=$(cat /etc/podinfo/gha-render-devices) | |
| else | |
| DEVICE_FLAG="--device /dev/dri" | |
| fi | |
| docker run -dt \ | |
| --device=/dev/kfd $DEVICE_FLAG \ | |
| --shm-size=16G \ | |
| --network=host \ | |
| --group-add $(getent group render | cut -d: -f3) \ | |
| --group-add $(getent group video | cut -d: -f3) \ | |
| -v "${{ github.workspace }}:/workspace" \ | |
| -w /workspace \ | |
| --name aiter_test \ | |
| ${{ env.DOCKER_IMAGE }} | |
| - name: Setup pip config | |
| run: | | |
| docker exec -u root aiter_test bash -c "pip config set global.default-timeout 60" | |
| docker exec -u root aiter_test bash -c "pip config set global.retries 10" | |
| - name: Setup-Aiter | |
| run: | | |
| set -ex | |
| echo "Setting up Aiter..." | |
| docker exec \ | |
| -w /workspace \ | |
| aiter_test \ | |
| bash -c "BUILD_TRITON=0 ./.github/scripts/build_aiter_triton.sh" | |
| - name: Tests | |
| run: | | |
| set -ex | |
| docker exec \ | |
| -e MULTIGPU=TRUE \ | |
| -w /workspace \ | |
| aiter_test \ | |
| bash -c "MAX_JOBS=20 ./.github/scripts/aiter_test.sh" | |
| - name: Upload test logs | |
| uses: actions/upload-artifact@v4 | |
| if: always() | |
| with: | |
| name: multigpu-test-${{ matrix.runner }} | |
| path: latest_test.log | |
| - name: Cleanup container | |
| if: always() | |
| run: | | |
| docker rm -f aiter_test || true | |
| - name: Clean up Rocm processes | |
| if: always() | |
| run: | | |
| ./.github/scripts/clean_up_rocm.sh | |