Skip to content

Commit 0420b40

Browse files
authored
Merge pull request #2 from project-codeflare/RHOAIENG-26660
RHOAIENG-26660: upgrade to nightly
2 parents 2acede4 + 6c04b12 commit 0420b40

File tree

5 files changed

+121
-37
lines changed

5 files changed

+121
-37
lines changed
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
name: 'Cleanup Kind Cluster'
2+
description: 'Deletes all Kind clusters for cleanup'
3+
4+
runs:
5+
using: 'composite'
6+
steps:
7+
- name: Cleanup
8+
run: kind delete clusters --all || true
9+
shell: bash
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
name: 'Debug Kubernetes Cluster'
2+
description: 'Comprehensive debugging information for Kubernetes cluster and system resources'
3+
4+
runs:
5+
using: 'composite'
6+
steps:
7+
- name: Debug Kubernetes Cluster on Failure
8+
run: |
9+
echo "--- DEBUG OUTPUT ---"
10+
echo "Checking GitHub Runner resources..."
11+
lscpu
12+
free -h
13+
echo "Checking Kubernetes cluster state..."
14+
kubectl cluster-info
15+
kubectl get nodes
16+
17+
echo "Describing all nodes..."
18+
kubectl describe nodes
19+
echo "Printing all Kubernetes events..."
20+
kubectl get events -A
21+
echo "Getting logs of Kind control plane container..."
22+
docker logs kind-control-plane
23+
echo "--- END OF DEBUG OUTPUT ---"
24+
shell: bash
25+
26+
- name: Display all pods on kind cluster
27+
run: kubectl get pods -A
28+
shell: bash
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
name: 'Install KubeRay Operator'
2+
description: 'Installs KubeRay Operator with specified version and waits for it to be available'
3+
4+
inputs:
5+
version:
6+
description: 'Version of KubeRay Operator to install'
7+
required: true
8+
timeout:
9+
description: 'Timeout for waiting for operator to be available (default: 90s)'
10+
required: false
11+
default: '90s'
12+
13+
runs:
14+
using: 'composite'
15+
steps:
16+
- name: Install KubeRay Operator
17+
run: |
18+
helm install kuberay-operator kuberay/kuberay-operator --version ${{ inputs.version }}
19+
shell: bash
20+
21+
- name: Wait for KubeRay Operator to be available
22+
run: kubectl wait --timeout=${{ inputs.timeout }} --for=condition=Available=true deployment -n default kuberay-operator
23+
shell: bash
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
name: 'Setup Kind Cluster with KubeRay Helm Repo'
2+
description: 'Creates a Kind cluster, sets up kubectl context, and adds KubeRay helm repository'
3+
4+
runs:
5+
using: 'composite'
6+
steps:
7+
- name: Create Kind Cluster
8+
run: kind create cluster
9+
shell: bash
10+
11+
- name: Set kubectl context
12+
run: kubectl cluster-info --context kind-kind
13+
shell: bash
14+
15+
- name: Add KubeRay Helm Repository
16+
run: |
17+
helm repo add kuberay https://ray-project.github.io/kuberay-helm/
18+
helm repo update
19+
shell: bash

.github/workflows/kuberay-upgrade.yml

Lines changed: 42 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -31,57 +31,62 @@ jobs:
3131
curl -Lo helm.tar.gz https://get.helm.sh/helm-v3.12.2-linux-amd64.tar.gz
3232
tar -zxvf helm.tar.gz
3333
mv linux-amd64/helm /usr/local/bin/helm
34-
helm repo add kuberay https://ray-project.github.io/kuberay-helm/
35-
helm repo update
3634
3735
- name: Install kind
3836
run: go install sigs.k8s.io/kind@latest
3937

40-
- name: Create Kind Cluster
41-
run: kind create cluster
42-
43-
- name: Set kubectl context
44-
run: kubectl cluster-info --context kind-kind
38+
- name: Setup first Kind Cluster with KubeRay Helm Repo
39+
uses: ./.github/actions/setup-kind-cluster
4540

4641
- name: Install Older KubeRay Operator (v1.3.2)
47-
run: |
48-
helm repo add kuberay https://ray-project.github.io/kuberay-helm/
49-
helm install kuberay-operator kuberay/kuberay-operator --version 1.3.2
50-
51-
- name: Wait for Kuberay Operator to be available
52-
run: kubectl wait --timeout=90s --for=condition=Available=true deployment -n default kuberay-operator
42+
uses: ./.github/actions/install-kuberay-operator
43+
with:
44+
version: '1.3.2'
5345

54-
- name: Prepare and Run Upgrade Tests
46+
- name: Prepare and Run Upgrade Tests to 1.4.0
47+
id: upgrade-to-latest
5548
run: |
5649
cd kuberay/ray-operator
5750
echo "--- START:Running e2e Operator upgrade (v1.3.2 to v1.4.0 operator) tests"
5851
KUBERAY_TEST_TIMEOUT_SHORT=1m KUBERAY_TEST_TIMEOUT_MEDIUM=5m KUBERAY_TEST_TIMEOUT_LONG=10m KUBERAY_TEST_UPGRADE_IMAGE=v1.4.0 \
5952
go test -timeout 30m -v ./test/e2eupgrade
6053
echo "--- END:e2e Operator upgrade (v1.3.2 to v1.4.0 operator) tests finished"
6154
62-
- name: Debug Kubernetes Cluster on Failure
63-
if: failure()
55+
- name: Debug Kubernetes Cluster on First Test Failure
56+
if: failure() && steps.upgrade-to-latest.outcome == 'failure'
57+
uses: ./.github/actions/debug-kubernetes-cluster
58+
59+
- name: Cleanup first Kind Cluster
60+
if: always()
61+
uses: ./.github/actions/cleanup-kind-cluster
62+
63+
- name: Setup second Kind Cluster with KubeRay Helm Repo
64+
uses: ./.github/actions/setup-kind-cluster
65+
66+
- name: Install KubeRay Operator (v1.4.0) for nightly upgrade
67+
uses: ./.github/actions/install-kuberay-operator
68+
with:
69+
version: '1.4.0'
70+
71+
- name: Prepare and Run Upgrade Tests to nightly
72+
id: upgrade-to-nightly
6473
run: |
65-
echo "--- DEBUG OUTPUT ---"
66-
echo "Checking GitHub Runner resources..."
67-
lscpu
68-
free -h
69-
echo "Checking Kubernetes cluster state..."
70-
kubectl cluster-info
71-
kubectl get nodes
72-
73-
echo "Describing all nodes..."
74-
kubectl describe nodes
75-
echo "Printing all Kubernetes events..."
76-
kubectl get events -A
77-
echo "Getting logs of Kind control plane container..."
78-
docker logs kind-control-plane
79-
echo "--- END OF DEBUG OUTPUT ---"
80-
- name: Display all pods on kind cluster
81-
if: failure()
82-
run: kubectl get pods -A
83-
84-
- name: Cleanup
74+
cd kuberay/ray-operator
75+
export KUBERAY_TEST_UPGRADE_IMAGE="nightly"
76+
export IMG="kuberay/kuberay-operator:$KUBERAY_TEST_UPGRADE_IMAGE"
77+
make docker-image
78+
kind load docker-image "$IMG"
79+
80+
echo "--- START:Running e2e Operator upgrade (v1.4.0 to nightly operator) tests"
81+
USE_LOCAL_HELM_CHART=true KUBERAY_TEST_TIMEOUT_SHORT=1m KUBERAY_TEST_TIMEOUT_MEDIUM=5m KUBERAY_TEST_TIMEOUT_LONG=10m KUBERAY_TEST_UPGRADE_IMAGE="$KUBERAY_TEST_UPGRADE_IMAGE" \
82+
go test -timeout 30m -v ./test/e2eupgrade
83+
echo "--- END:e2e Operator upgrade (v1.4.0 to nightly operator) tests finished"
84+
85+
- name: Debug Kubernetes Cluster on Second Test Failure
86+
if: failure() && steps.upgrade-to-nightly.outcome == 'failure'
87+
uses: ./.github/actions/debug-kubernetes-cluster
88+
89+
- name: Cleanup second Kind Cluster
8590
if: always()
86-
run: kind delete clusters --all || true
91+
uses: ./.github/actions/cleanup-kind-cluster
8792

0 commit comments

Comments
 (0)