Skip to content

Commit 683131b

Browse files
committed
Merge remote-tracking branch 'upstream/main'
2 parents f464957 + 9861fae commit 683131b

File tree

2 files changed

+189
-25
lines changed

2 files changed

+189
-25
lines changed

tests/common/support/dataScienceCluster.go

Lines changed: 175 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -19,16 +19,36 @@ package support
1919
import (
2020
"context"
2121
"fmt"
22+
"strings"
2223
"time"
2324

2425
"github.com/onsi/gomega"
2526

27+
apierrors "k8s.io/apimachinery/pkg/api/errors"
2628
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
2729
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
2830
"k8s.io/apimachinery/pkg/runtime/schema"
2931
"k8s.io/client-go/dynamic"
3032
)
3133

34+
// DSC constants
35+
const (
36+
// Phase values
37+
PhaseReady = "Ready"
38+
39+
// Condition status values
40+
ConditionTrue = "True"
41+
ConditionFalse = "False"
42+
43+
// Condition reason values
44+
ReasonRemoved = "Removed"
45+
46+
// Management state values
47+
StateManaged = "Managed"
48+
StateRemoved = "Removed"
49+
StateUnmanaged = "Unmanaged"
50+
)
51+
3252
var DscGVR = schema.GroupVersionResource{
3353
Group: "datasciencecluster.opendatahub.io",
3454
Version: "v2",
@@ -43,50 +63,187 @@ func updateDSC(dynamicClient dynamic.Interface, ctx context.Context, dsc *unstru
4363
return dynamicClient.Resource(DscGVR).Update(ctx, dsc, metav1.UpdateOptions{})
4464
}
4565

46-
func setComponentState(dynamicClient dynamic.Interface, ctx context.Context, dscName, component, state string) error {
47-
dsc, err := getDSC(dynamicClient, ctx, dscName)
48-
if err != nil {
49-
return err
66+
func GetDSCPhase(dsc *unstructured.Unstructured) string {
67+
phase, found, err := unstructured.NestedString(dsc.Object, "status", "phase")
68+
if err != nil || !found {
69+
return ""
5070
}
71+
return phase
72+
}
5173

52-
err = unstructured.SetNestedField(dsc.Object, state, "spec", "components", component, "managementState")
53-
if err != nil {
54-
return err
74+
func WaitForDSCReady(dynamicClient dynamic.Interface, ctx context.Context, dscName string) error {
75+
var phase string
76+
77+
for {
78+
select {
79+
case <-ctx.Done():
80+
return fmt.Errorf("%s: timed out waiting for DSC to be Ready - current state: %s", dscName, phase)
81+
default:
82+
dsc, err := getDSC(dynamicClient, ctx, dscName)
83+
if err != nil {
84+
return err
85+
}
86+
phase = GetDSCPhase(dsc)
87+
if phase == PhaseReady {
88+
return nil
89+
}
90+
time.Sleep(5 * time.Second)
91+
}
92+
}
93+
}
94+
95+
type ConditionDetails struct {
96+
Type string
97+
Status string
98+
Reason string
99+
Message string
100+
}
101+
102+
func componentReadyCondition(component string) string {
103+
if len(component) == 0 {
104+
return ""
55105
}
106+
return strings.ToUpper(component[:1]) + component[1:] + "Ready"
107+
}
56108

57-
_, err = updateDSC(dynamicClient, ctx, dsc)
58-
return err
109+
func GetConditionDetails(dsc *unstructured.Unstructured, conditionType string) ConditionDetails {
110+
details := ConditionDetails{Type: conditionType}
111+
conditions, found, err := unstructured.NestedSlice(dsc.Object, "status", "conditions")
112+
if err != nil || !found {
113+
return details
114+
}
115+
for _, c := range conditions {
116+
condition, ok := c.(map[string]interface{})
117+
if !ok {
118+
continue
119+
}
120+
if condition["type"] == conditionType {
121+
if status, ok := condition["status"].(string); ok {
122+
details.Status = status
123+
}
124+
if reason, ok := condition["reason"].(string); ok {
125+
details.Reason = reason
126+
}
127+
if message, ok := condition["message"].(string); ok {
128+
details.Message = message
129+
}
130+
return details
131+
}
132+
}
133+
return details
59134
}
60135

61-
func WaitForComponentState(dynamicClient dynamic.Interface, ctx context.Context, dscName, component, expectedState string, timeout time.Duration) error {
62-
deadline := time.Now().Add(timeout)
63-
for time.Now().Before(deadline) {
136+
func setComponentState(dynamicClient dynamic.Interface, ctx context.Context, dscName, component, state string) error {
137+
// Retry up to 5 times to handle optimistic locking conflicts
138+
maxRetries := 5
139+
for i := 0; i < maxRetries; i++ {
64140
dsc, err := getDSC(dynamicClient, ctx, dscName)
65141
if err != nil {
66142
return err
67143
}
68-
currentState := ComponentStatusManagementState(dsc, component)
69-
if currentState == expectedState {
144+
145+
err = unstructured.SetNestedField(dsc.Object, state, "spec", "components", component, "managementState")
146+
if err != nil {
147+
return err
148+
}
149+
150+
_, err = updateDSC(dynamicClient, ctx, dsc)
151+
if err == nil {
70152
return nil
71153
}
72-
time.Sleep(2 * time.Second)
154+
155+
if apierrors.IsConflict(err) {
156+
time.Sleep(500 * time.Millisecond)
157+
continue
158+
}
159+
160+
return err
161+
}
162+
return fmt.Errorf("failed to set component %s state after %d retries due to conflicts", component, maxRetries)
163+
}
164+
165+
func WaitForComponentReady(dynamicClient dynamic.Interface, ctx context.Context, dscName, component string) error {
166+
conditionType := componentReadyCondition(component)
167+
var condition ConditionDetails
168+
169+
for {
170+
select {
171+
case <-ctx.Done():
172+
return fmt.Errorf("%s: timed out waiting for component to be ready - status: %s, reason: %s, message: %s",
173+
conditionType, condition.Status, condition.Reason, condition.Message)
174+
default:
175+
dsc, err := getDSC(dynamicClient, ctx, dscName)
176+
if err != nil {
177+
return err
178+
}
179+
condition = GetConditionDetails(dsc, conditionType)
180+
181+
if condition.Status == ConditionTrue {
182+
return nil
183+
}
184+
185+
time.Sleep(5 * time.Second)
186+
}
73187
}
74-
return fmt.Errorf("timeout waiting for component %s to reach state %s", component, expectedState)
75188
}
76189

190+
func WaitForComponentRemoved(dynamicClient dynamic.Interface, ctx context.Context, dscName, component string) error {
191+
conditionType := componentReadyCondition(component)
192+
var condition ConditionDetails
193+
194+
for {
195+
select {
196+
case <-ctx.Done():
197+
return fmt.Errorf("%s: timed out waiting for component to be removed - status: %s, reason: %s, message: %s",
198+
conditionType, condition.Status, condition.Reason, condition.Message)
199+
default:
200+
dsc, err := getDSC(dynamicClient, ctx, dscName)
201+
if err != nil {
202+
return err
203+
}
204+
condition = GetConditionDetails(dsc, conditionType)
205+
206+
if condition.Status == "False" && condition.Reason == ReasonRemoved {
207+
return nil
208+
}
209+
210+
time.Sleep(5 * time.Second)
211+
}
212+
}
213+
}
214+
215+
// SetComponentStateAndWait sets a component's managementState and waits for completion.
216+
// It creates a context with timeout that enforces the deadline across all operations.
77217
func SetComponentStateAndWait(dynamicClient dynamic.Interface, ctx context.Context, dscName, component, state string, timeout time.Duration) error {
218+
// Create a context with timeout - this will automatically enforce the deadline
219+
ctx, cancel := context.WithTimeout(ctx, timeout)
220+
defer cancel()
221+
78222
if err := setComponentState(dynamicClient, ctx, dscName, component, state); err != nil {
79223
return err
80224
}
81-
return WaitForComponentState(dynamicClient, ctx, dscName, component, state, timeout)
225+
226+
if state == StateManaged {
227+
if err := WaitForComponentReady(dynamicClient, ctx, dscName, component); err != nil {
228+
return err
229+
}
230+
}
231+
232+
if state == StateRemoved {
233+
if err := WaitForComponentRemoved(dynamicClient, ctx, dscName, component); err != nil {
234+
return err
235+
}
236+
}
237+
238+
return WaitForDSCReady(dynamicClient, ctx, dscName)
82239
}
83240

84241
func GetDSC(test Test, name string) (*unstructured.Unstructured, error) {
85242
return getDSC(test.Client().Dynamic(), test.Ctx(), name)
86243
}
87244

88245
func SetComponentToUnmanaged(test Test, dscName string, component string) error {
89-
return setComponentState(test.Client().Dynamic(), test.Ctx(), dscName, component, "Unmanaged")
246+
return setComponentState(test.Client().Dynamic(), test.Ctx(), dscName, component, StateUnmanaged)
90247
}
91248

92249
func DSCResource(test Test, name string) func(g gomega.Gomega) *unstructured.Unstructured {

tests/trainer/trainer_kueue_integration_test.go

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,9 @@ var initialKueueState string
4646
var initialTrainerState string
4747

4848
func TestMain(m *testing.M) {
49+
var code int
50+
var setupFailed bool
51+
4952
// Capture initial Trainer state before running any tests
5053
initialTrainerState = captureComponentState("trainer")
5154
fmt.Printf("Initial Trainer managementState: %s\n", initialTrainerState)
@@ -57,15 +60,19 @@ func TestMain(m *testing.M) {
5760
// Setup Trainer to Managed if not already
5861
if initialTrainerState != "Managed" {
5962
if err := setupTrainer(); err != nil {
60-
fmt.Printf("Setup: Failed to set Trainer managementState to Managed in DataScienceCluster: %v\n", err)
61-
os.Exit(1)
63+
fmt.Printf("Setup failed: %v\n", err)
64+
fmt.Println("Skipping test execution due to setup failure ...")
65+
setupFailed = true
66+
code = 1
6267
}
6368
} else {
6469
fmt.Println("Setup: Skipping Trainer setup as it is already set to Managed in DataScienceCluster")
6570
}
6671

67-
// Run all tests
68-
code := m.Run()
72+
// Run all tests only if setup succeeded
73+
if !setupFailed {
74+
code = m.Run()
75+
}
6976

7077
// TearDown Trainer: Only set to Removed if it was not already Managed before tests
7178
if initialTrainerState != "Managed" {
@@ -79,7 +86,7 @@ func TestMain(m *testing.M) {
7986
// TearDown Kueue: Only set to Removed if it was not already Unmanaged before tests
8087
if initialKueueState != "Unmanaged" {
8188
if err := tearDownComponent("kueue"); err != nil {
82-
fmt.Printf("TearDown: Failed to set Kueue to Removed : %v\n", err)
89+
fmt.Printf("TearDown: Failed to set Kueue to Removed: %v\n", err)
8390
}
8491
} else {
8592
fmt.Println("TearDown: Skipping Kueue teardown as Initial Kueue managementState was Unmanaged in DataScienceCluster")
@@ -124,13 +131,13 @@ func captureComponentState(component string) string {
124131
func setupTrainer() error {
125132
dynamicClient, err := createDynamicClient()
126133
if err != nil {
127-
return fmt.Errorf("Setup: %w", err)
134+
return err
128135
}
129136

130137
fmt.Println("Setup: Setting trainer managementState to Managed in DataScienceCluster...")
131138
err = SetComponentStateAndWait(dynamicClient, context.Background(), defaultDSCName, "trainer", "Managed", 2*time.Minute)
132139
if err != nil {
133-
return fmt.Errorf("Setup: failed to set trainer to Managed: %w", err)
140+
return err
134141
}
135142

136143
fmt.Println("Setup: Trainer is set to Managed managementState successfully")

0 commit comments

Comments
 (0)