Skip to content

Commit 6b4ee07

Browse files
auto-detect image registry from operator pod for RHOAI and ODH builds
1 parent 883088a commit 6b4ee07

5 files changed

Lines changed: 83 additions & 44 deletions

File tree

tests/common/support/config.go

Lines changed: 35 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@ limitations under the License.
1717
package support
1818

1919
import (
20+
"strings"
21+
2022
"github.com/onsi/gomega"
2123

2224
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
@@ -55,6 +57,38 @@ func GetOpenShiftApiUrl(test Test) string {
5557
return openShiftApiUrl
5658
}
5759

60+
// GetExpectedImagePrefix returns the expected container image prefix (registry + org)
61+
// by inspecting the rhods-operator deployment in the redhat-ods-operator namespace.
62+
// Returns "registry.redhat.io/rhoai" for RHOAI builds or "quay.io/opendatahub" for ODH builds.
63+
func GetExpectedImagePrefix(test Test) string {
64+
test.T().Helper()
65+
66+
pods := GetPods(test, "redhat-ods-operator", metav1.ListOptions{
67+
FieldSelector: "status.phase=Running",
68+
})
69+
70+
for _, pod := range pods {
71+
if strings.HasPrefix(pod.Name, "rhods-operator-") {
72+
test.Expect(pod.Spec.Containers).NotTo(gomega.BeEmpty(),
73+
"rhods-operator pod %s has no containers", pod.Name)
74+
image := pod.Spec.Containers[0].Image
75+
parts := strings.SplitN(image, "/", 3)
76+
if len(parts) >= 3 {
77+
prefix := parts[0] + "/" + parts[1]
78+
test.T().Logf("Detected operator image prefix: %s", prefix)
79+
return prefix
80+
}
81+
}
82+
}
83+
84+
test.T().Fatal("No running rhods-operator pod found in redhat-ods-operator namespace")
85+
return ""
86+
}
87+
88+
// GetExpectedRegistry returns the expected container registry
89+
// ("registry.redhat.io" for RHOAI builds, "quay.io" for ODH builds).
5890
func GetExpectedRegistry(test Test) string {
59-
return "registry.redhat.io"
91+
test.T().Helper()
92+
prefix := GetExpectedImagePrefix(test)
93+
return strings.SplitN(prefix, "/", 2)[0]
6094
}

tests/kfto/kfto_smoke_test.go

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -75,11 +75,11 @@ func TestMain(m *testing.M) {
7575

7676
func TestKftoSmoke(t *testing.T) {
7777
Tags(t, Smoke)
78-
runSmoke(t, "kubeflow-training-operator", "odh-training-operator")
78+
runSmoke(t, "kubeflow-training-operator", "odh-training-operator", "training-operator")
7979
}
8080

81-
// runSmoke runs a smoke test for a given deployment and expected image name.
82-
func runSmoke(t *testing.T, deploymentName string, expectedImage string) {
81+
// runSmoke runs a smoke test for a given deployment and expected image names.
82+
func runSmoke(t *testing.T, deploymentName, rhoaiImage, odhImage string) {
8383
test := With(t)
8484
namespace, err := GetApplicationsNamespaceFromDSCI(test, DefaultDSCIName)
8585
test.Expect(err).NotTo(HaveOccurred())
@@ -96,10 +96,14 @@ func runSmoke(t *testing.T, deploymentName string, expectedImage string) {
9696

9797
test.T().Logf("%s deployment is available", deploymentName)
9898

99-
// Determine registry based on cluster environment
100-
registryName := GetExpectedRegistry(test)
99+
imagePrefix := GetExpectedImagePrefix(test)
101100

102-
test.T().Logf("Verifying %s container image is referred from expected registry ...", deploymentName)
101+
expectedImage := rhoaiImage
102+
if strings.HasPrefix(imagePrefix, "quay.io") {
103+
expectedImage = odhImage
104+
}
105+
106+
test.T().Logf("Verifying %s container image is referred from expected prefix %s ...", deploymentName, imagePrefix)
103107

104108
// List all running pods in the namespace
105109
podList := GetPods(test, namespace, metav1.ListOptions{
@@ -125,7 +129,7 @@ func runSmoke(t *testing.T, deploymentName string, expectedImage string) {
125129
}
126130

127131
containerImage := matchedPods[0].Spec.Containers[0].Image
128-
test.Expect(containerImage).To(ContainSubstring(registryName + "/rhoai/" + expectedImage))
129-
test.T().Logf("%s container image is referred from %s", deploymentName, registryName)
132+
test.Expect(containerImage).To(ContainSubstring(imagePrefix + "/" + expectedImage))
133+
test.T().Logf("%s container image is referred from %s", deploymentName, imagePrefix)
130134

131135
}

tests/trainer/cluster_training_runtimes_test.go

Lines changed: 7 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -35,8 +35,7 @@ func TestDefaultClusterTrainingRuntimes(t *testing.T) {
3535
Tags(t, Smoke)
3636
test := With(t)
3737

38-
// Determine registry based on cluster environment
39-
registryName := GetExpectedRegistry(test)
38+
imagePrefix := GetExpectedImagePrefix(test)
4039

4140
// Build a map of expected runtimes for quick lookup
4241
expectedRuntimeMap := make(map[string]trainerutils.ClusterTrainingRuntime)
@@ -84,8 +83,7 @@ func TestDefaultClusterTrainingRuntimes(t *testing.T) {
8483
test.Expect(foundImage).NotTo(BeEmpty(), "No container image found in ClusterTrainingRuntime %s", runtime.Name)
8584
test.T().Logf("Image referred in ClusterTrainingRuntime is %s", foundImage)
8685

87-
// Verify image based on environment
88-
expectedImage := registryName + "/rhoai/" + expectedRuntime.RHOAIImage
86+
expectedImage := imagePrefix + "/" + expectedRuntime.Image
8987
test.Expect(foundImage).To(ContainSubstring(expectedImage),
9088
"Image %s should contain %s", foundImage, expectedImage)
9189
test.T().Logf("ClusterTrainingRuntime '%s' uses expected image: %s", expectedRuntime.Name, expectedImage)
@@ -154,11 +152,11 @@ func TestRunTrainJobWithDefaultClusterTrainingRuntimes(t *testing.T) {
154152
// Run one TrainJob per unique image to avoid redundant runs for CTRs that share the same image
155153
tested := make(map[string]bool)
156154
for _, runtime := range trainerutils.ExpectedRuntimes {
157-
if tested[runtime.RHOAIImage] {
158-
test.T().Logf("Skipping ClusterTrainingRuntime '%s' (image '%s' already tested)", runtime.Name, runtime.RHOAIImage)
155+
if tested[runtime.Image] {
156+
test.T().Logf("Skipping ClusterTrainingRuntime '%s' (image '%s' already tested)", runtime.Name, runtime.Image)
159157
continue
160158
}
161-
tested[runtime.RHOAIImage] = true
159+
tested[runtime.Image] = true
162160

163161
test.T().Logf("Running TrainJob with ClusterTrainingRuntime: %s", runtime.Name)
164162

@@ -222,8 +220,7 @@ func createTrainJob(test Test, namespace, runtimeName string) *trainerv1alpha1.T
222220
}
223221

224222
func verifyPodContainerImages(test Test, namespace, trainJobName string) {
225-
// Determine registry based on cluster environment
226-
registryName := GetExpectedRegistry(test)
223+
imagePrefix := GetExpectedImagePrefix(test)
227224

228225
product, err := GetProduct(test)
229226
test.Expect(err).NotTo(HaveOccurred(), "Failed to get product")
@@ -244,7 +241,7 @@ func verifyPodContainerImages(test Test, namespace, trainJobName string) {
244241
test.Expect(images).NotTo(BeEmpty(), "No container images found for Pod %s", pod.Name)
245242

246243
for _, image := range images {
247-
test.Expect(image).To(HavePrefix(registryName), "Image %s should have registry prefix %s", image, registryName)
244+
test.Expect(image).To(HavePrefix(imagePrefix), "Image %s should have prefix %s", image, imagePrefix)
248245
test.Expect(image).To(MatchRegexp(`@sha256:[a-f0-9]{64}$`),
249246
"Image %s should be SHA-based with valid digest", image)
250247

tests/trainer/trainer_smoke_test.go

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -18,11 +18,11 @@ import (
1818

1919
func TestKubeflowTrainerSmoke(t *testing.T) {
2020
Tags(t, Smoke)
21-
runSmoke(t, "kubeflow-trainer-controller-manager", "odh-trainer")
21+
runSmoke(t, "kubeflow-trainer-controller-manager", "odh-trainer", "trainer")
2222
}
2323

2424
// runSmoke runs a smoke test for a given deployment and expected image names.
25-
func runSmoke(t *testing.T, deploymentName string, expectedImage string) {
25+
func runSmoke(t *testing.T, deploymentName, rhoaiImage, odhImage string) {
2626
test := With(t)
2727
namespace, err := GetApplicationsNamespaceFromDSCI(test, DefaultDSCIName)
2828
test.Expect(err).NotTo(HaveOccurred())
@@ -39,10 +39,14 @@ func runSmoke(t *testing.T, deploymentName string, expectedImage string) {
3939

4040
test.T().Logf("%s deployment is available", deploymentName)
4141

42-
// Determine registry based on cluster environment
43-
registryName := GetExpectedRegistry(test)
42+
imagePrefix := GetExpectedImagePrefix(test)
4443

45-
test.T().Logf("Verifying %s container image is referred from expected registry ...", deploymentName)
44+
expectedImage := rhoaiImage
45+
if strings.HasPrefix(imagePrefix, "quay.io") {
46+
expectedImage = odhImage
47+
}
48+
49+
test.T().Logf("Verifying %s container image is referred from expected prefix %s ...", deploymentName, imagePrefix)
4650

4751
// List all running pods in the namespace
4852
podList := GetPods(test, namespace, metav1.ListOptions{
@@ -69,6 +73,6 @@ func runSmoke(t *testing.T, deploymentName string, expectedImage string) {
6973
}
7074

7175
containerImage := matchedPods[0].Spec.Containers[0].Image
72-
test.Expect(containerImage).To(ContainSubstring(registryName + "/rhoai/" + expectedImage))
73-
test.T().Logf("%s container image is referred from %s", deploymentName, registryName)
76+
test.Expect(containerImage).To(ContainSubstring(imagePrefix + "/" + expectedImage))
77+
test.T().Logf("%s container image is referred from %s", deploymentName, imagePrefix)
7478
}

tests/trainer/utils/utils_runtimes.go

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -25,10 +25,10 @@ import (
2525
"github.com/opendatahub-io/distributed-workloads/tests/common/support"
2626
)
2727

28-
// ClusterTrainingRuntime represents a ClusterTrainingRuntime with its expected RHOAI image
28+
// ClusterTrainingRuntime represents a ClusterTrainingRuntime with its expected image name
2929
type ClusterTrainingRuntime struct {
30-
Name string
31-
RHOAIImage string
30+
Name string
31+
Image string
3232
}
3333

3434
const (
@@ -88,21 +88,21 @@ var TrainingHubToDefaultClusterRuntime = map[string]string{
8888

8989
// ExpectedRuntimes is the list of expected ClusterTrainingRuntimes on the cluster
9090
var ExpectedRuntimes = []ClusterTrainingRuntime{
91-
{Name: DefaultClusterTrainingRuntimeCUDA, RHOAIImage: "odh-th06-cuda130-torch291-py312"},
92-
{Name: DefaultClusterTrainingRuntimeROCm, RHOAIImage: "odh-th06-rocm64-torch291-py312"},
93-
{Name: DefaultClusterTrainingRuntimeCPU, RHOAIImage: "odh-th06-cpu-torch291-py312"},
94-
{Name: "torch-distributed-cuda128-torch29-py312", RHOAIImage: "odh-training-cuda128-torch29-py312"},
95-
{Name: "torch-distributed-rocm64-torch29-py312", RHOAIImage: "odh-training-rocm64-torch29-py312"},
96-
{Name: "torch-distributed-cuda130-torch291-py312", RHOAIImage: "odh-th06-cuda130-torch291-py312"},
97-
{Name: "torch-distributed-rocm64-torch291-py312", RHOAIImage: "odh-th06-rocm64-torch291-py312"},
98-
{Name: "torch-distributed-cpu-torch291-py312", RHOAIImage: "odh-th06-cpu-torch291-py312"},
99-
{Name: DefaultTrainingHubRuntimeCUDA, RHOAIImage: "odh-th06-cuda130-torch291-py312"},
100-
{Name: DefaultTrainingHubRuntimeCPU, RHOAIImage: "odh-th06-cpu-torch291-py312"},
101-
{Name: DefaultTrainingHubRuntimeROCm, RHOAIImage: "odh-th06-rocm64-torch291-py312"},
102-
{Name: "training-hub-th05-cuda128-torch29-py312", RHOAIImage: "odh-training-cuda128-torch29-py312"},
103-
{Name: "training-hub-th06-cuda130-torch291-py312", RHOAIImage: "odh-th06-cuda130-torch291-py312"},
104-
{Name: "training-hub-th06-cpu-torch291-py312", RHOAIImage: "odh-th06-cpu-torch291-py312"},
105-
{Name: "training-hub-th06-rocm64-torch291-py312", RHOAIImage: "odh-th06-rocm64-torch291-py312"},
91+
{Name: DefaultClusterTrainingRuntimeCUDA, Image: "odh-th06-cuda130-torch291-py312"},
92+
{Name: DefaultClusterTrainingRuntimeROCm, Image: "odh-th06-rocm64-torch291-py312"},
93+
{Name: DefaultClusterTrainingRuntimeCPU, Image: "odh-th06-cpu-torch291-py312"},
94+
{Name: "torch-distributed-cuda128-torch29-py312", Image: "odh-training-cuda128-torch29-py312"},
95+
{Name: "torch-distributed-rocm64-torch29-py312", Image: "odh-training-rocm64-torch29-py312"},
96+
{Name: "torch-distributed-cuda130-torch291-py312", Image: "odh-th06-cuda130-torch291-py312"},
97+
{Name: "torch-distributed-rocm64-torch291-py312", Image: "odh-th06-rocm64-torch291-py312"},
98+
{Name: "torch-distributed-cpu-torch291-py312", Image: "odh-th06-cpu-torch291-py312"},
99+
{Name: DefaultTrainingHubRuntimeCUDA, Image: "odh-th06-cuda130-torch291-py312"},
100+
{Name: DefaultTrainingHubRuntimeCPU, Image: "odh-th06-cpu-torch291-py312"},
101+
{Name: DefaultTrainingHubRuntimeROCm, Image: "odh-th06-rocm64-torch291-py312"},
102+
{Name: "training-hub-th05-cuda128-torch29-py312", Image: "odh-training-cuda128-torch29-py312"},
103+
{Name: "training-hub-th06-cuda130-torch291-py312", Image: "odh-th06-cuda130-torch291-py312"},
104+
{Name: "training-hub-th06-cpu-torch291-py312", Image: "odh-th06-cpu-torch291-py312"},
105+
{Name: "training-hub-th06-rocm64-torch291-py312", Image: "odh-th06-rocm64-torch291-py312"},
106106
}
107107

108108
// GetImageFromClusterTrainingRuntime retrieves the container image from the named ClusterTrainingRuntime

0 commit comments

Comments
 (0)