Skip to content

Initial commit - cut over from private repository #1

Initial commit - cut over from private repository

Initial commit - cut over from private repository #1

Workflow file for this run

name: DataStax NVIDIA Test Matrix CI
on:
push:
branches: [ main ]
workflow_dispatch: # Allow manual triggers
jobs:
terraform-apply-and-test:
runs-on: ubuntu-latest
environment: production
permissions:
id-token: write # Required for AWS OIDC authentication
contents: read
steps:
- name: Checkout code
uses: actions/checkout@v3
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@v1
with:
role-to-assume: ${{ secrets.AWS_ROLE_ARN }}
aws-region: us-west-2
- name: Setup Terraform
uses: hashicorp/setup-terraform@v2
with:
terraform_version: 1.9.0
- name: Terraform Init
run: terraform init
- name: Terraform Plan
run: terraform plan -out=tfplan
- name: Terraform Apply
run: terraform apply -auto-approve tfplan
- name: Install kubectl
uses: azure/setup-kubectl@v3
with:
version: 'latest'
- name: Install Helm
uses: azure/setup-helm@v3
with:
version: 'latest'
- name: Install yq
run: |
wget https://github.com/mikefarah/yq/releases/latest/download/yq_linux_amd64 -O /usr/local/bin/yq
chmod +x /usr/local/bin/yq
- name: Run post-setup script
run: |
chmod +x ./post-tf-setup.sh
./post-tf-setup.sh
env:
AWS_REGION: us-west-2
CLUSTER_NAME: dbost
GPU_OPERATOR_VERSION: v25.3.0
RUNAI_TOKEN: ${{ secrets.RUNAI_TOKEN }}
RUNAI_DOMAIN: ${{ secrets.RUNAI_DOMAIN }}
RUNAI_KEY_PATH: "/tmp/runai.key"
RUNAI_CLIENT_SECRET: ${{ secrets.RUNAI_CLIENT_SECRET }}
RUNAI_UID: ${{ secrets.RUNAI_UID }}
NGC_KEY: ${{ secrets.NGC_KEY }}
# Add the RunAI key if RunAI is being used
- name: Setup RunAI Key
if: ${{ env.RUNAI_TOKEN != '' }}
run: |
echo "${{ secrets.RUNAI_KEY_CONTENT }}" > /tmp/runai.key
chmod 600 /tmp/runai.key
- name: Run diagnostics
run: |
chmod +x ./diagnose-cluster.sh
./diagnose-cluster.sh
- name: Archive diagnostic results
uses: actions/upload-artifact@v3
with:
name: cluster-diagnostics
path: |
./cluster-diagnostics/
- name: Run application tests
run: |
# Add your application-specific tests here
# Example: Deploy and test NVIDIA NeMo or DataStax SIA workloads
echo "Running application tests..."
- name: Notify on success
if: success()
uses: rtCamp/action-slack-notify@v2
env:
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
SLACK_CHANNEL: datastax-nvidia-ci
SLACK_COLOR: good
SLACK_TITLE: Test Matrix CI Succeeded
SLACK_MESSAGE: "The DataStax NVIDIA Test Matrix CI pipeline has completed successfully."
- name: Notify on failure
if: failure()
uses: rtCamp/action-slack-notify@v2
env:
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
SLACK_CHANNEL: datastax-nvidia-ci
SLACK_COLOR: danger
SLACK_TITLE: Test Matrix CI Failed
SLACK_MESSAGE: "The DataStax NVIDIA Test Matrix CI pipeline has failed. Please check the logs for details."
- name: Cleanup resources
if: always() # Run even if previous steps fail
run: |
terraform destroy -auto-approve