Skip to content

Commit eeac1e4

Browse files
committed
added github action to run the e2e tests in a kind cluster
1 parent f671c39 commit eeac1e4

File tree

7 files changed

+182
-14
lines changed

7 files changed

+182
-14
lines changed
Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,110 @@
1+
name: KinD ODH E2E Tests
2+
3+
on:
4+
pull_request:
5+
branches:
6+
- main
7+
workflow_dispatch:
8+
9+
jobs:
10+
get-merge-commit:
11+
name: Get merge commit
12+
uses: ./.github/workflows/get-merge-commit.yaml
13+
14+
kind-odh-e2e-tests:
15+
runs-on: ubuntu-latest
16+
permissions:
17+
contents: read
18+
env:
19+
IMAGE_BUILDER: docker
20+
IMG: opendatahub-operator:pr-${{ github.event.number }}
21+
CLUSTER_NAME: kind-odh-${{ github.event.number }}-${{ github.run_id }}-${{ github.run_attempt }}
22+
OPERATOR_NAMESPACE: opendatahub-operator-system
23+
KIND_CONFIG_PATH: kind-config.yaml
24+
steps:
25+
- name: Checkout code
26+
uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5.0.1
27+
with:
28+
ref: ${{ needs.get-merge-commit.outputs.mergedSha }}
29+
30+
- name: Set up Go
31+
uses: actions/setup-go@44694675825211faa026b3c33043df3e48a5fa00 # v6.0.0
32+
with:
33+
go-version-file: go.mod
34+
35+
- name: Create custom KinD config
36+
run: |
37+
cat > ${{ env.KIND_CONFIG_PATH }} <<EOF
38+
# six nodes (three control + three workers) cluster config
39+
kind: Cluster
40+
apiVersion: kind.x-k8s.io/v1alpha4
41+
nodes:
42+
- role: control-plane
43+
- role: control-plane
44+
- role: control-plane
45+
- role: worker
46+
- role: worker
47+
- role: worker
48+
EOF
49+
50+
- name: Create KinD Cluster
51+
uses: helm/kind-action@ef37e7f390d99f746eb8b610417061a60e82a6cc # v1.14.0
52+
with:
53+
cluster_name: ${{ env.CLUSTER_NAME }}
54+
config: ${{ env.KIND_CONFIG_PATH }}
55+
56+
- name: Build Operator Image
57+
run: |
58+
make image-build
59+
60+
- name: Load Operator Image into the KinD Cluster
61+
run: |
62+
kind load docker-image ${{ env.IMG }} --name ${{ env.CLUSTER_NAME }}
63+
64+
- name: Install CRDs
65+
run: |
66+
make install
67+
68+
- name: Deploy Operator
69+
run: |
70+
# The image is uploaded to the KinD cluster, so we need to use the local image instead of trying to pull it from a remote registry.
71+
sed -i 's|imagePullPolicy: Always|imagePullPolicy: IfNotPresent|g' config/manager/manager.yaml
72+
73+
make deploy
74+
75+
- name: Verify Operator Installation
76+
run: |
77+
echo "Waiting for operator deployment to be available..."
78+
79+
for i in $(seq 1 30); do
80+
echo "[$(date +%H:%M:%S)] Verifying state... ($i/30)"
81+
82+
READY=$(kubectl get deployment opendatahub-operator-controller-manager -n ${{ env.OPERATOR_NAMESPACE }} -o jsonpath='{.status.readyReplicas}')
83+
DESIRED=$(kubectl get deployment opendatahub-operator-controller-manager -n ${{ env.OPERATOR_NAMESPACE }} -o jsonpath='{.spec.replicas}')
84+
85+
echo "Ready replicas: ${READY:-0} / $DESIRED"
86+
87+
if [ "${READY}" == "$DESIRED" ]; then
88+
echo "✅ Deployment ready!"
89+
break
90+
fi
91+
92+
if [ $i -eq 30 ]; then
93+
echo "❌ Timeout."
94+
kubectl describe pods -n ${{ env.OPERATOR_NAMESPACE }}
95+
kubectl logs deployment/opendatahub-operator-controller-manager -n ${{ env.OPERATOR_NAMESPACE }} --all-containers --tail=100
96+
exit 1
97+
fi
98+
99+
sleep 10
100+
done
101+
kubectl get deployments -n ${{ env.OPERATOR_NAMESPACE }}
102+
# kubectl wait --for=condition=Available deployment/opendatahub-operator-controller-manager -n ${{ env.OPERATOR_NAMESPACE }} --timeout=600s
103+
echo "Listing pods in ${{ env.OPERATOR_NAMESPACE }}:"
104+
kubectl get pods -n ${{ env.OPERATOR_NAMESPACE }}
105+
echo "Checking installed CRDs:"
106+
kubectl get crds | grep opendatahub.io
107+
108+
- name: Run E2E Tests
109+
run: |
110+
make e2e-test -e E2E_TEST_CLEAN_UP_PREVIOUS_RESOURCES=false -e E2E_TEST_DEPENDANT_OPERATORS_MANAGEMENT=false -e E2E_TEST_WEBHOOK=false -e E2E_TEST_COMPONENT="kserve" -e E2E_TEST_SERVICES=false -e E2E_TEST_OPERATOR_RESILIENCE=false -e E2E_TEST_OPERATOR_V2TOV3UPGRADE=false -e E2E_TEST_HARDWARE_PROFILE=false

README.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -728,6 +728,8 @@ Evn vars can be set to configure e2e tests:
728728
| E2E_TEST_WORKBENCHES_NAMESPACE | Namespace where the workbenches are deployed. | `opendatahub` |
729729
| E2E_TEST_DSC_MONITORING_NAMESPACE | Namespace where the ODH monitoring is deployed. | `opendatahub` |
730730
| E2E_TEST_OPERATOR_CONTROLLER | To configure the execution of tests related to the Operator POD, this is useful to run e2e tests for an operator running out of the cluster i.e. for debugging purposes | `true` |
731+
| E2E_TEST_DSC_MANAGEMENT | To configure the execution of DSCI/DSC management tests | `true` |
732+
| E2E_TEST_DEPENDANT_OPERATORS_MANAGEMENT | To configure the execution of dependant operators management tests | `true` |
731733
| E2E_TEST_OPERATOR_RESILIENCE | To configure the execution of operator resilience tests, useful for testing operator fault tolerance scenarios | `true` |
732734
| E2E_TEST_WEBHOOK | To configure the execution of tests related to the Operator WebHooks, this is useful to run e2e tests for an operator running out of the cluster i.e. for debugging purposes | `true` |
733735
| E2E_TEST_DELETION_POLICY | Specify when to delete `DataScienceCluster`, `DSCInitialization`, and controllers. Valid options are: `always`, `on-failure`, and `never`. | `always` |
@@ -751,6 +753,8 @@ Alternatively the above configurations can be passed to e2e-tests as flags by se
751753
| --workbenches-namespace | Namespace where the workbenches are deployed. | `opendatahub` |
752754
| --dsc-monitoring-namespace | Namespace where the ODH monitoring is deployed. | `opendatahub` |
753755
| --test-operator-controller | To configure the execution of tests related to the Operator POD, this is useful to run e2e tests for an operator running out of the cluster i.e. for debugging purposes | `true` |
756+
| --test-dsc-management | To configure the execution of DSCI/DSC management tests | `true` |
757+
| --test-dependant-operators-management | To configure the execution of dependant operators management tests | `true` |
754758
| --test-operator-resilience | To configure the execution of operator resilience tests, useful for testing operator fault tolerance scenarios | `true` |
755759
| --test-webhook | To configure the execution of tests related to the Operator WebHooks, this is useful to run e2e tests for an operator running out of the cluster i.e. for debugging purposes | `true` |
756760
| --deletion-policy | Specify when to delete `DataScienceCluster`, `DSCInitialization`, and controllers. Valid options are: `always`, `on-failure`, and `never`. | `always` |

pkg/utils/test/testf/testf_witht.go

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -127,7 +127,7 @@ func (t *WithT) List(
127127

128128
err := t.Client().List(ctx, &items, option...)
129129
if err != nil {
130-
return nil, StopErr(err, "failed to list resource: %s", gvk)
130+
return nil, err
131131
}
132132

133133
return items.Items, nil
@@ -163,7 +163,9 @@ func (t *WithT) Get(
163163
case k8serr.IsNotFound(err):
164164
return nil, nil
165165
case err != nil:
166-
return nil, StopErr(err, "failed to get resource: %s, nn: %s", gvk, nn.String())
166+
// Do not use StopErr for NoMatchError, as it may be a temporary issue or expected in some environments (e.g. KinD)
167+
// The caller should decide how to handle it.
168+
return nil, err
167169
default:
168170
return &u, nil
169171
}

tests/e2e/controller_test.go

Lines changed: 23 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -81,14 +81,16 @@ type TestContextConfig struct {
8181
monitoringNamespace string
8282
deletionPolicy DeletionPolicy
8383

84-
failFastWhenError bool
85-
cleanUpPreviousResources bool
86-
operatorControllerTest bool
87-
operatorResilienceTest bool
88-
webhookTest bool
89-
v2tov3upgradeTest bool
90-
hardwareProfileTest bool
91-
TestTimeouts TestTimeouts
84+
failFastWhenError bool
85+
cleanUpPreviousResources bool
86+
dependantOperatorsManagementTest bool
87+
dscManagementTest bool
88+
operatorControllerTest bool
89+
operatorResilienceTest bool
90+
webhookTest bool
91+
v2tov3upgradeTest bool
92+
hardwareProfileTest bool
93+
TestTimeouts TestTimeouts
9294
}
9395

9496
// TestGroup defines the test groups.
@@ -301,6 +303,13 @@ func TestOdhOperator(t *testing.T) {
301303
// Remove any leftover resources from previous test runs before starting if the cleanup flag is enabled
302304
if testOpts.cleanUpPreviousResources {
303305
CleanupPreviousTestResources(t)
306+
}
307+
308+
if testOpts.dependantOperatorsManagementTest {
309+
mustRun(t, "Dependant Operators Management E2E Tests", dependantOperatorsManagementTestSuite)
310+
}
311+
312+
if testOpts.dscManagementTest {
304313
// Run DSCI/DSC management test suite
305314
mustRun(t, "DSCInitialization and DataScienceCluster management E2E Tests", dscManagementTestSuite)
306315
}
@@ -417,6 +426,10 @@ func TestMain(m *testing.M) {
417426
checkEnvVarBindingError(viper.BindEnv("clean-up-previous-resources", viper.GetEnvPrefix()+"_CLEAN_UP_PREVIOUS_RESOURCES"))
418427
pflag.Bool("test-operator-controller", true, "run operator controller tests")
419428
checkEnvVarBindingError(viper.BindEnv("test-operator-controller", viper.GetEnvPrefix()+"_OPERATOR_CONTROLLER"))
429+
pflag.Bool("test-dependant-operators-management", true, "run dependant operators management tests")
430+
checkEnvVarBindingError(viper.BindEnv("test-dependant-operators-management", viper.GetEnvPrefix()+"_DEPENDANT_OPERATORS_MANAGEMENT"))
431+
pflag.Bool("test-dsc-management", true, "run DSCI/DSC management tests")
432+
checkEnvVarBindingError(viper.BindEnv("test-dsc-management", viper.GetEnvPrefix()+"_DSC_MANAGEMENT"))
420433
pflag.Bool("test-operator-resilience", true, "run operator resilience tests")
421434
checkEnvVarBindingError(viper.BindEnv("test-operator-resilience", viper.GetEnvPrefix()+"_OPERATOR_RESILIENCE"))
422435
pflag.Bool("test-operator-v2tov3upgrade", true, "run V2 to V3 upgrade tests")
@@ -480,6 +493,8 @@ func TestMain(m *testing.M) {
480493
testOpts.failFastWhenError = viper.GetBool("fail-fast-on-error")
481494
testOpts.cleanUpPreviousResources = viper.GetBool("clean-up-previous-resources")
482495
testOpts.operatorControllerTest = viper.GetBool("test-operator-controller")
496+
testOpts.dependantOperatorsManagementTest = viper.GetBool("test-dependant-operators-management")
497+
testOpts.dscManagementTest = viper.GetBool("test-dsc-management")
483498
testOpts.operatorResilienceTest = viper.GetBool("test-operator-resilience")
484499
testOpts.v2tov3upgradeTest = viper.GetBool("test-operator-v2tov3upgrade")
485500
testOpts.hardwareProfileTest = viper.GetBool("test-hardware-profile")

tests/e2e/creation_test.go

Lines changed: 26 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,8 +31,8 @@ type DSCTestCtx struct {
3131
*TestContext
3232
}
3333

34-
// dscManagementTestSuite runs the DataScienceCluster and DSCInitialization management test suite.
35-
func dscManagementTestSuite(t *testing.T) {
34+
// dscManagementTestSuite runs the dependant operators management test suite.
35+
func dependantOperatorsManagementTestSuite(t *testing.T) {
3636
t.Helper()
3737

3838
// disruptive tests are only supported on tier3 clusters
@@ -51,6 +51,30 @@ func dscManagementTestSuite(t *testing.T) {
5151
testCases := []TestCase{
5252
{"Ensure required operators are installed", dscTestCtx.ValidateOperatorsInstallation},
5353
{"Ensure required resources are created", dscTestCtx.ValidateResourcesCreation},
54+
}
55+
56+
// Run the test suite.
57+
RunTestCases(t, testCases)
58+
}
59+
60+
// dscManagementTestSuite runs the DataScienceCluster and DSCInitialization management test suite.
61+
func dscManagementTestSuite(t *testing.T) {
62+
t.Helper()
63+
64+
// disruptive tests are only supported on tier3 clusters
65+
skipUnless(t, Tier3)
66+
67+
// Initialize the test context.
68+
tc, err := NewTestContext(t)
69+
require.NoError(t, err, "Failed to initialize test context")
70+
71+
// Create an instance of test context.
72+
dscTestCtx := DSCTestCtx{
73+
TestContext: tc,
74+
}
75+
76+
// Define test cases.
77+
testCases := []TestCase{
5478
{"Validate creation of DSCInitialization instance", dscTestCtx.ValidateDSCICreation},
5579
{"Validate creation of DataScienceCluster instance", dscTestCtx.ValidateDSCCreation},
5680
}

tests/e2e/scripts/run_e2e_tests.sh

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,12 @@ validate_bool E2E_TEST_HARDWARE_PROFILE
6161
: "${E2E_TEST_WEBHOOK:=true}"
6262
validate_bool E2E_TEST_WEBHOOK
6363

64+
: "${E2E_TEST_DSC_MANAGEMENT:=false}"
65+
validate_bool E2E_TEST_DSC_MANAGEMENT
66+
67+
: "${E2E_TEST_DEPENDANT_OPERATORS_MANAGEMENT:=false}"
68+
validate_bool E2E_TEST_DEPENDANT_OPERATORS_MANAGEMENT
69+
6470
: "${E2E_TEST_COMPONENTS:=true}"
6571
validate_bool E2E_TEST_COMPONENTS
6672

@@ -92,7 +98,9 @@ exec gotestsum --junitfile-project-name odh-operator-e2e \
9298
--deletion-policy="$E2E_TEST_DELETION_POLICY" --clean-up-previous-resources="$E2E_TEST_CLEAN_UP_PREVIOUS_RESOURCES" \
9399
--test-operator-controller="$E2E_TEST_OPERATOR_CONTROLLER" --test-operator-resilience="$E2E_TEST_OPERATOR_RESILIENCE" \
94100
--test-operator-v2tov3upgrade="$E2E_TEST_OPERATOR_V2TOV3UPGRADE" --test-hardware-profile="$E2E_TEST_HARDWARE_PROFILE" \
95-
--test-webhook="$E2E_TEST_WEBHOOK" --test-components="$E2E_TEST_COMPONENTS" --test-services="$E2E_TEST_SERVICES" \
101+
--test-webhook="$E2E_TEST_WEBHOOK" --test-dsc-management="$E2E_TEST_DSC_MANAGEMENT" \
102+
--test-dependant-operators-management="$E2E_TEST_DEPENDANT_OPERATORS_MANAGEMENT" \
103+
--test-components="$E2E_TEST_COMPONENTS" --test-services="$E2E_TEST_SERVICES" \
96104
--operator-namespace="$E2E_TEST_OPERATOR_NAMESPACE" --applications-namespace="$E2E_TEST_APPLICATIONS_NAMESPACE" \
97105
--workbenches-namespace="$E2E_TEST_WORKBENCHES_NAMESPACE" --dsc-monitoring-namespace="$E2E_TEST_DSC_MONITORING_NAMESPACE" \
98106
--fail-fast-on-error="$E2E_TEST_FAIL_FAST_ON_ERROR" --tag="$E2E_TEST_TAG" "$@"

tests/e2e/test_context_test.go

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -618,6 +618,9 @@ func (tc *TestContext) FetchActualSubscription(nn types.NamespacedName) (*ofapi.
618618
WithCustomErrorMsg("Failed to fetch Subscription %s/%s", nn.Namespace, nn.Name),
619619
))
620620
if err != nil {
621+
if meta.IsNoMatchError(err) {
622+
return nil, nil
623+
}
621624
return nil, err
622625
}
623626
if subU == nil {
@@ -1305,6 +1308,9 @@ func (tc *TestContext) FetchActualClusterServiceVersion(nn types.NamespacedName)
13051308
WithCustomErrorMsg("Failed to fetch CSV %s/%s", nn.Namespace, nn.Name),
13061309
))
13071310
if err != nil {
1311+
if meta.IsNoMatchError(err) {
1312+
return nil, nil
1313+
}
13081314
return nil, err
13091315
}
13101316
if csvU == nil {
@@ -1588,7 +1594,6 @@ func (tc *TestContext) UninstallOperator(operatorNamespacedName types.Namespaced
15881594
return
15891595
}
15901596
if sub == nil {
1591-
// Subscription doesn't exist, nothing to uninstall
15921597
return
15931598
}
15941599

0 commit comments

Comments
 (0)