Skip to content

refactor: centralize CI variables and add component image overrides #7

refactor: centralize CI variables and add component image overrides

refactor: centralize CI variables and add component image overrides #7

Workflow file for this run

# Copyright NVIDIA CORPORATION
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
name: E2E Tests
on:
push:
branches:
- "pull-request/[0-9]+"
- main
- release-*
workflow_call:
inputs:
operator_image:
required: true
type: string
operator_version:
required: true
type: string
toolkit_image:
required: false
type: string
description: 'Full container-toolkit image path (e.g., ghcr.io/nvidia/container-toolkit:v1.18.0)'
device_plugin_image:
required: false
type: string
description: 'Full device-plugin image path'
mig_manager_image:
required: false
type: string
description: 'Full mig-manager image path'
secrets:
AWS_ACCESS_KEY_ID:
required: true
AWS_SECRET_ACCESS_KEY:
required: true
AWS_SSH_KEY:
required: true
SLACK_BOT_TOKEN:
required: false
SLACK_CHANNEL_ID:
required: false
workflow_dispatch:
inputs:
operator_image:
description: 'Operator image to test (override)'
required: false
type: string
operator_version:
description: 'Operator version to test (override)'
required: false
type: string
toolkit_image:
description: 'Override container-toolkit image'
required: false
type: string
device_plugin_image:
description: 'Override device-plugin image'
required: false
type: string
mig_manager_image:
description: 'Override mig-manager image'
required: false
type: string
jobs:
variables:
uses: ./.github/workflows/variables.yaml
with:
operator_image: ${{ inputs.operator_image }}
operator_version: ${{ inputs.operator_version }}
toolkit_image: ${{ inputs.toolkit_image }}
device_plugin_image: ${{ inputs.device_plugin_image }}
mig_manager_image: ${{ inputs.mig_manager_image }}
e2e-tests-containerd:
needs: [variables]
runs-on: linux-amd64-cpu4
permissions:
contents: read
id-token: write
steps:
- uses: actions/checkout@v5
name: Check out code
- name: Set up Holodeck
uses: NVIDIA/[email protected]
with:
aws_access_key_id: ${{ secrets.AWS_ACCESS_KEY_ID }}
aws_secret_access_key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
aws_ssh_key: ${{ secrets.AWS_SSH_KEY }}
holodeck_config: "tests/holodeck.yaml"
- name: Get public dns name
id: get_public_dns_name
uses: mikefarah/yq@master
with:
cmd: yq '.status.properties[] | select(.name == "public-dns-name") | .value' /github/workspace/.cache/holodeck.yaml
- name: Set test environment
run: |
echo "instance_hostname=ubuntu@${{ steps.get_public_dns_name.outputs.result }}" >> $GITHUB_ENV
echo "private_key=${{ github.workspace }}/key.pem" >> $GITHUB_ENV
- name: Run e2e tests
env:
OPERATOR_VERSION: ${{ needs.variables.outputs.operator_version }}
OPERATOR_IMAGE: ${{ needs.variables.outputs.operator_image }}
TOOLKIT_CONTAINER_IMAGE: ${{ needs.variables.outputs.toolkit_image }}
DEVICE_PLUGIN_IMAGE: ${{ needs.variables.outputs.device_plugin_image }}
MIG_MANAGER_IMAGE: ${{ needs.variables.outputs.mig_manager_image }}
GPU_PRODUCT_NAME: "Tesla-T4"
SKIP_LAUNCH: "true"
CONTAINER_RUNTIME: "containerd"
TEST_CASE: "./tests/cases/defaults.sh"
run: |
echo "${{ secrets.AWS_SSH_KEY }}" > ${private_key} && chmod 400 ${private_key}
./tests/ci-run-e2e.sh ${OPERATOR_IMAGE} ${OPERATOR_VERSION} ${GPU_PRODUCT_NAME} ${TEST_CASE} || rc=$?
./tests/scripts/pull.sh /tmp/logs logs
exit $rc
- name: Archive test logs
if: ${{ failure() }}
uses: actions/upload-artifact@v5
with:
name: containerd-e2e-test-logs
path: ./logs/
retention-days: 15
e2e-tests-nvidiadriver:
needs: [variables]
runs-on: linux-amd64-cpu4
permissions:
contents: read
id-token: write
steps:
- uses: actions/checkout@v5
name: Check out code
- name: Set up Holodeck
uses: NVIDIA/[email protected]
with:
aws_access_key_id: ${{ secrets.AWS_ACCESS_KEY_ID }}
aws_secret_access_key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
aws_ssh_key: ${{ secrets.AWS_SSH_KEY }}
holodeck_config: "tests/holodeck.yaml"
- name: Get public dns name
id: get_public_dns_name
uses: mikefarah/yq@master
with:
cmd: yq '.status.properties[] | select(.name == "public-dns-name") | .value' /github/workspace/.cache/holodeck.yaml
- name: Set test environment
run: |
echo "instance_hostname=ubuntu@${{ steps.get_public_dns_name.outputs.result }}" >> $GITHUB_ENV
echo "private_key=${{ github.workspace }}/key.pem" >> $GITHUB_ENV
- name: Run e2e tests
env:
OPERATOR_VERSION: ${{ needs.variables.outputs.operator_version }}
OPERATOR_IMAGE: ${{ needs.variables.outputs.operator_image }}
TOOLKIT_CONTAINER_IMAGE: ${{ needs.variables.outputs.toolkit_image }}
DEVICE_PLUGIN_IMAGE: ${{ needs.variables.outputs.device_plugin_image }}
MIG_MANAGER_IMAGE: ${{ needs.variables.outputs.mig_manager_image }}
GPU_PRODUCT_NAME: "Tesla-T4"
SKIP_LAUNCH: "true"
CONTAINER_RUNTIME: "containerd"
TEST_CASE: "./tests/cases/nvidia-driver.sh"
run: |
echo "${{ secrets.AWS_SSH_KEY }}" > ${private_key} && chmod 400 ${private_key}
./tests/ci-run-e2e.sh ${OPERATOR_IMAGE} ${OPERATOR_VERSION} ${GPU_PRODUCT_NAME} ${TEST_CASE} || rc=$?
./tests/scripts/pull.sh /tmp/logs logs
exit $rc
- name: Archive test logs
if: ${{ failure() }}
uses: actions/upload-artifact@v5
with:
name: nvidiadriver-e2e-test-logs
path: ./logs/
retention-days: 15