Initial commit - cut over from private repository #1
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: DataStax NVIDIA Test Matrix CI | |
| on: | |
| push: | |
| branches: [ main ] | |
| workflow_dispatch: # Allow manual triggers | |
| jobs: | |
| terraform-apply-and-test: | |
| runs-on: ubuntu-latest | |
| environment: production | |
| permissions: | |
| id-token: write # Required for AWS OIDC authentication | |
| contents: read | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v3 | |
| - name: Configure AWS credentials | |
| uses: aws-actions/configure-aws-credentials@v1 | |
| with: | |
| role-to-assume: ${{ secrets.AWS_ROLE_ARN }} | |
| aws-region: us-west-2 | |
| - name: Setup Terraform | |
| uses: hashicorp/setup-terraform@v2 | |
| with: | |
| terraform_version: 1.9.0 | |
| - name: Terraform Init | |
| run: terraform init | |
| - name: Terraform Plan | |
| run: terraform plan -out=tfplan | |
| - name: Terraform Apply | |
| run: terraform apply -auto-approve tfplan | |
| - name: Install kubectl | |
| uses: azure/setup-kubectl@v3 | |
| with: | |
| version: 'latest' | |
| - name: Install Helm | |
| uses: azure/setup-helm@v3 | |
| with: | |
| version: 'latest' | |
| - name: Install yq | |
| run: | | |
| wget https://github.com/mikefarah/yq/releases/latest/download/yq_linux_amd64 -O /usr/local/bin/yq | |
| chmod +x /usr/local/bin/yq | |
| - name: Run post-setup script | |
| run: | | |
| chmod +x ./post-tf-setup.sh | |
| ./post-tf-setup.sh | |
| env: | |
| AWS_REGION: us-west-2 | |
| CLUSTER_NAME: dbost | |
| GPU_OPERATOR_VERSION: v25.3.0 | |
| RUNAI_TOKEN: ${{ secrets.RUNAI_TOKEN }} | |
| RUNAI_DOMAIN: ${{ secrets.RUNAI_DOMAIN }} | |
| RUNAI_KEY_PATH: "/tmp/runai.key" | |
| RUNAI_CLIENT_SECRET: ${{ secrets.RUNAI_CLIENT_SECRET }} | |
| RUNAI_UID: ${{ secrets.RUNAI_UID }} | |
| NGC_KEY: ${{ secrets.NGC_KEY }} | |
| # Add the RunAI key if RunAI is being used | |
| - name: Setup RunAI Key | |
| if: ${{ env.RUNAI_TOKEN != '' }} | |
| run: | | |
| echo "${{ secrets.RUNAI_KEY_CONTENT }}" > /tmp/runai.key | |
| chmod 600 /tmp/runai.key | |
| - name: Run diagnostics | |
| run: | | |
| chmod +x ./diagnose-cluster.sh | |
| ./diagnose-cluster.sh | |
| - name: Archive diagnostic results | |
| uses: actions/upload-artifact@v3 | |
| with: | |
| name: cluster-diagnostics | |
| path: | | |
| ./cluster-diagnostics/ | |
| - name: Run application tests | |
| run: | | |
| # Add your application-specific tests here | |
| # Example: Deploy and test NVIDIA NeMo or DataStax SIA workloads | |
| echo "Running application tests..." | |
| - name: Notify on success | |
| if: success() | |
| uses: rtCamp/action-slack-notify@v2 | |
| env: | |
| SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }} | |
| SLACK_CHANNEL: datastax-nvidia-ci | |
| SLACK_COLOR: good | |
| SLACK_TITLE: Test Matrix CI Succeeded | |
| SLACK_MESSAGE: "The DataStax NVIDIA Test Matrix CI pipeline has completed successfully." | |
| - name: Notify on failure | |
| if: failure() | |
| uses: rtCamp/action-slack-notify@v2 | |
| env: | |
| SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }} | |
| SLACK_CHANNEL: datastax-nvidia-ci | |
| SLACK_COLOR: danger | |
| SLACK_TITLE: Test Matrix CI Failed | |
| SLACK_MESSAGE: "The DataStax NVIDIA Test Matrix CI pipeline has failed. Please check the logs for details." | |
| - name: Cleanup resources | |
| if: always() # Run even if previous steps fail | |
| run: | | |
| terraform destroy -auto-approve |