Nightly GPU Tests #48
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # Copyright 2026 Tensor Auto Inc. All rights reserved. | |
| # | |
| # Licensed under the Apache License, Version 2.0 (the "License"); | |
| # you may not use this file except in compliance with the License. | |
| # You may obtain a copy of the License at | |
| # | |
| # http://www.apache.org/licenses/LICENSE-2.0 | |
| # | |
| # Unless required by applicable law or agreed to in writing, software | |
| # distributed under the License is distributed on an "AS IS" BASIS, | |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| # See the License for the specific language governing permissions and | |
| # limitations under the License. | |
| name: Nightly GPU Tests | |
| on: | |
| schedule: | |
| # Run at 2:00 AM PST every day (10:00 AM UTC) | |
| - cron: '0 10 * * *' | |
| workflow_dispatch: | |
| permissions: | |
| contents: read # Required for actions/checkout | |
| env: | |
| MUJOCO_GL: "egl" | |
| PYOPENGL_PLATFORM: "egl" | |
| jobs: | |
| start-runner: | |
| name: Start GPU Runner | |
| runs-on: ubuntu-latest | |
| permissions: | |
| id-token: write # Required for requesting the JWT | |
| contents: read | |
| steps: | |
| - name: Configure AWS Credentials | |
| uses: aws-actions/configure-aws-credentials@v4 | |
| with: | |
| role-to-assume: ${{ secrets.AWS_ROLE_ARN }} | |
| aws-region: us-west-2 | |
| - name: Start Instance | |
| run: | | |
| aws autoscaling set-desired-capacity --auto-scaling-group-name github-runner-asg-g6-2xlarge --desired-capacity 1 | |
| echo "Waiting for instance to be ready..." | |
| gpu-test: | |
| name: Run Pytest on GPU | |
| needs: start-runner | |
| runs-on: [g6.2xlarge] | |
| timeout-minutes: 30 | |
| container: | |
| image: nvidia/cuda:12.2.0-devel-ubuntu22.04 | |
| options: --gpus all | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| persist-credentials: false | |
| - name: Install system dependencies | |
| run: | | |
| apt-get update && apt-get install -y python3 python3-pip git ffmpeg libegl1 libegl-mesa0 libegl-dev libgl1 libglx-mesa0 libgles2 mesa-utils curl cmake build-essential | |
| - name: Install uv | |
| uses: astral-sh/setup-uv@v5 | |
| with: | |
| version: "latest" | |
| - name: Install dependencies | |
| run: | | |
| uv sync --extra dev --extra libero | |
| - name: Check GPU | |
| run: nvidia-smi | |
| - name: Set up HuggingFace authentication | |
| shell: bash | |
| env: | |
| HF_TOKEN: ${{ secrets.HF_TOKEN }} | |
| run: | | |
| source .venv/bin/activate | |
| hf auth login --token $HF_TOKEN | |
| - name: Run Tests | |
| shell: bash | |
| run: | | |
| source .venv/bin/activate | |
| mkdir -p /tmp/libero-assets/libero/libero | |
| export LIBERO_CONFIG_PATH="$(pwd)/.github/assets/libero" | |
| pytest -m "gpu" -n 0 -v tests/ | |
| stop-runner: | |
| name: Stop GPU Runner | |
| needs: [start-runner, gpu-test] | |
| if: always() # Run even if tests fail | |
| runs-on: ubuntu-latest | |
| permissions: | |
| id-token: write # Required for requesting the JWT | |
| contents: read | |
| steps: | |
| - name: Configure AWS Credentials | |
| uses: aws-actions/configure-aws-credentials@v4 | |
| with: | |
| role-to-assume: ${{ secrets.AWS_ROLE_ARN }} | |
| aws-region: us-west-2 | |
| - name: Stop Instance | |
| run: | | |
| aws autoscaling set-desired-capacity --auto-scaling-group-name github-runner-asg-g6-2xlarge --desired-capacity 0 |