Skip to content

Commit 1bdbacf

Browse files
sutaakaropenshift-merge-robot
authored andcommitted
Convert MNIST PyTorch and MNIST Ray test cases to go
1 parent 8b1e171 commit 1bdbacf

File tree

16 files changed

+924
-15
lines changed

16 files changed

+924
-15
lines changed

tests/go.mod

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,10 @@
1-
module github.com/opendatahub-io/distributed-workloads/tests/new-tests
1+
module github.com/opendatahub-io/distributed-workloads/tests
22

33
require (
44
github.com/onsi/gomega v1.27.10
55
github.com/openshift/api v0.0.0-20230718161610-2a3e8b481cec
6-
github.com/project-codeflare/codeflare-operator v0.2.3
6+
github.com/project-codeflare/codeflare-operator v0.2.4-0.20230913142530-526bb53289e1
7+
github.com/project-codeflare/multi-cluster-app-dispatcher v1.34.1
78
github.com/ray-project/kuberay/ray-operator v0.0.0-20230908233208-a8f730e5a2b6
89
k8s.io/api v0.27.2
910
k8s.io/apimachinery v0.27.2
@@ -30,7 +31,6 @@ require (
3031
github.com/modern-go/reflect2 v1.0.2 // indirect
3132
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
3233
github.com/openshift/client-go v0.0.0-20230718165156-6014fb98e86a // indirect
33-
github.com/project-codeflare/multi-cluster-app-dispatcher v1.34.1 // indirect
3434
github.com/spf13/pflag v1.0.5 // indirect
3535
github.com/stretchr/testify v1.8.4 // indirect
3636
golang.org/x/net v0.12.0 // indirect

tests/go.sum

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -107,8 +107,8 @@ github.com/openshift/client-go v0.0.0-20230718165156-6014fb98e86a h1:ZKewwwEIURD
107107
github.com/openshift/client-go v0.0.0-20230718165156-6014fb98e86a/go.mod h1:EjhPQjEm8HM3GThz5ywNGLEec1P1IjTn08kwzdvupvA=
108108
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
109109
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
110-
github.com/project-codeflare/codeflare-operator v0.2.3 h1:aVsJD519hBjFoftSFlVjEzySM7JjoKgRGhxLwy88YKE=
111-
github.com/project-codeflare/codeflare-operator v0.2.3/go.mod h1:6J91NMtSthXp/gFTl1CDlyJo+rhBPQ+jc5OagdEqaVk=
110+
github.com/project-codeflare/codeflare-operator v0.2.4-0.20230913142530-526bb53289e1 h1:1iBWbUlDja0qpMnyH+u8uL0qEjlxMfldFxygFgyViqU=
111+
github.com/project-codeflare/codeflare-operator v0.2.4-0.20230913142530-526bb53289e1/go.mod h1:6J91NMtSthXp/gFTl1CDlyJo+rhBPQ+jc5OagdEqaVk=
112112
github.com/project-codeflare/multi-cluster-app-dispatcher v1.34.1 h1:ZNQ/JPdjS6CtaAzt6SNqaoWcpwS1PyVdgZlmIYikPLI=
113113
github.com/project-codeflare/multi-cluster-app-dispatcher v1.34.1/go.mod h1:Yge6GRNpO9YIDfeL+XOcCE9xbmfCTD5C1h5dlW87mxQ=
114114
github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=

tests/integration/mcad_ray_test.go

Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
/*
2+
Copyright 2023.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package integration
18+
19+
import (
20+
"testing"
21+
22+
. "github.com/onsi/gomega"
23+
cfosupport "github.com/project-codeflare/codeflare-operator/test/support"
24+
mcadv1beta1 "github.com/project-codeflare/multi-cluster-app-dispatcher/pkg/apis/controller/v1beta1"
25+
rayv1alpha1 "github.com/ray-project/kuberay/ray-operator/apis/ray/v1alpha1"
26+
27+
corev1 "k8s.io/api/core/v1"
28+
rbacv1 "k8s.io/api/rbac/v1"
29+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
30+
31+
support "github.com/opendatahub-io/distributed-workloads/tests/integration/support"
32+
)
33+
34+
func TestMCADRay(t *testing.T) {
35+
test := cfosupport.With(t)
36+
37+
test.T().Skip("Requires https://github.com/project-codeflare/codeflare-sdk/issues/190")
38+
39+
// Create a namespace
40+
namespace := test.NewTestNamespace()
41+
42+
// Test configuration
43+
jupyterNotebookConfigMapFileName := "mnist_ray_mini.ipynb"
44+
config := &corev1.ConfigMap{
45+
TypeMeta: metav1.TypeMeta{
46+
APIVersion: corev1.SchemeGroupVersion.String(),
47+
Kind: "ConfigMap",
48+
},
49+
ObjectMeta: metav1.ObjectMeta{
50+
Name: "notebooks-ray",
51+
},
52+
BinaryData: map[string][]byte{
53+
// MNIST MCAD Notebook
54+
jupyterNotebookConfigMapFileName: ReadFile(test, "resources/mnist_ray_mini.ipynb"),
55+
"mnist.py": ReadFile(test, "resources/mnist.py"),
56+
"requirements.txt": ReadFile(test, "resources/requirements.txt"),
57+
},
58+
Immutable: cfosupport.Ptr(true),
59+
}
60+
config, err := test.Client().Core().CoreV1().ConfigMaps(namespace.Name).Create(test.Ctx(), config, metav1.CreateOptions{})
61+
test.Expect(err).NotTo(HaveOccurred())
62+
test.T().Logf("Created ConfigMap %s/%s successfully", config.Namespace, config.Name)
63+
64+
// Create RBAC, retrieve token for user with limited rights
65+
policyRules := []rbacv1.PolicyRule{
66+
{
67+
Verbs: []string{"get", "create", "delete", "list", "patch", "update"},
68+
APIGroups: []string{mcadv1beta1.GroupName},
69+
Resources: []string{"appwrappers"},
70+
},
71+
{
72+
Verbs: []string{"get", "list"},
73+
APIGroups: []string{rayv1alpha1.GroupVersion.Group},
74+
Resources: []string{"rayclusters", "rayclusters/status"},
75+
},
76+
{
77+
Verbs: []string{"get", "list"},
78+
APIGroups: []string{"route.openshift.io"},
79+
Resources: []string{"routes"},
80+
},
81+
}
82+
token := support.CreateTestRBAC(test, namespace, policyRules)
83+
84+
// Create Notebook CR
85+
support.CreateNotebook(test, namespace, token, config.Name, jupyterNotebookConfigMapFileName)
86+
87+
// Make sure the AppWrapper is created and running
88+
test.Eventually(cfosupport.AppWrappers(test, namespace), cfosupport.TestTimeoutLong).
89+
Should(
90+
And(
91+
HaveLen(1),
92+
ContainElement(WithTransform(cfosupport.AppWrapperName, HavePrefix("mnistjob"))),
93+
ContainElement(WithTransform(cfosupport.AppWrapperState, Equal(mcadv1beta1.AppWrapperStateActive))),
94+
),
95+
)
96+
97+
// Make sure the AppWrapper finishes and is deleted
98+
test.Eventually(cfosupport.AppWrappers(test, namespace), cfosupport.TestTimeoutLong).
99+
Should(HaveLen(0))
100+
}
Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
/*
2+
Copyright 2023.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package integration
18+
19+
import (
20+
"testing"
21+
22+
. "github.com/onsi/gomega"
23+
cfosupport "github.com/project-codeflare/codeflare-operator/test/support"
24+
mcadv1beta1 "github.com/project-codeflare/multi-cluster-app-dispatcher/pkg/apis/controller/v1beta1"
25+
26+
corev1 "k8s.io/api/core/v1"
27+
rbacv1 "k8s.io/api/rbac/v1"
28+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
29+
30+
support "github.com/opendatahub-io/distributed-workloads/tests/integration/support"
31+
)
32+
33+
func TestMnistPyTorchMCAD(t *testing.T) {
34+
test := cfosupport.With(t)
35+
36+
// Create a namespace
37+
namespace := test.NewTestNamespace()
38+
39+
// Test configuration
40+
jupyterNotebookConfigMapFileName := "mnist_mcad_mini.ipynb"
41+
config := &corev1.ConfigMap{
42+
TypeMeta: metav1.TypeMeta{
43+
APIVersion: corev1.SchemeGroupVersion.String(),
44+
Kind: "ConfigMap",
45+
},
46+
ObjectMeta: metav1.ObjectMeta{
47+
Name: "notebooks-mcad",
48+
},
49+
BinaryData: map[string][]byte{
50+
// MNIST MCAD Notebook
51+
jupyterNotebookConfigMapFileName: ReadFile(test, "resources/mnist_mcad_mini.ipynb"),
52+
},
53+
Immutable: cfosupport.Ptr(true),
54+
}
55+
config, err := test.Client().Core().CoreV1().ConfigMaps(namespace.Name).Create(test.Ctx(), config, metav1.CreateOptions{})
56+
test.Expect(err).NotTo(HaveOccurred())
57+
test.T().Logf("Created ConfigMap %s/%s successfully", config.Namespace, config.Name)
58+
59+
// Create RBAC, retrieve token for user with limited rights
60+
policyRules := []rbacv1.PolicyRule{
61+
{
62+
Verbs: []string{"get", "create", "delete", "list", "patch", "update"},
63+
APIGroups: []string{mcadv1beta1.GroupName},
64+
Resources: []string{"appwrappers"},
65+
},
66+
// Needed for job.logs()
67+
{
68+
Verbs: []string{"get"},
69+
APIGroups: []string{""},
70+
Resources: []string{"pods/log"},
71+
},
72+
}
73+
token := support.CreateTestRBAC(test, namespace, policyRules)
74+
75+
// Create Notebook CR
76+
support.CreateNotebook(test, namespace, token, config.Name, jupyterNotebookConfigMapFileName)
77+
78+
// Make sure the AppWrapper is created and running
79+
test.Eventually(cfosupport.AppWrappers(test, namespace), cfosupport.TestTimeoutLong).
80+
Should(
81+
And(
82+
HaveLen(1),
83+
ContainElement(WithTransform(cfosupport.AppWrapperName, HavePrefix("mnistjob"))),
84+
ContainElement(WithTransform(cfosupport.AppWrapperState, Equal(mcadv1beta1.AppWrapperStateActive))),
85+
),
86+
)
87+
88+
// Make sure the AppWrapper finishes and is deleted
89+
test.Eventually(cfosupport.AppWrappers(test, namespace), cfosupport.TestTimeoutLong).
90+
Should(HaveLen(0))
91+
}

tests/integration/ray_test.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,10 @@ func TestRayCluster(t *testing.T) {
3838
test := support.With(t)
3939
test.T().Parallel()
4040

41+
// This test is unstable. It seems that RayJob CR sometimes trigger 2 jobs in Ray, causing confusion in KubeRay operator.
42+
// Needs to be checked with newer KubeRay version. If still unstable then it needs to be reported.
43+
test.T().Skip("Requires https://github.com/opendatahub-io/distributed-workloads/issues/65")
44+
4145
// Create a namespace
4246
namespace := test.NewTestNamespace()
4347

tests/integration/resources/mnist.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -149,7 +149,7 @@ def test_dataloader(self):
149149
trainer = Trainer(
150150
accelerator="auto",
151151
# devices=1 if torch.cuda.is_available() else None, # limiting got iPython runs
152-
max_epochs=5,
152+
max_epochs=2,
153153
callbacks=[TQDMProgressBar(refresh_rate=20)],
154154
num_nodes=int(os.environ.get("GROUP_WORLD_SIZE", 1)),
155155
devices=int(os.environ.get("LOCAL_WORLD_SIZE", 1)),
Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"execution_count": null,
6+
"id": "b55bc3ea-4ce3-49bf-bb1f-e209de8ca47a",
7+
"metadata": {
8+
"tags": []
9+
},
10+
"outputs": [],
11+
"source": [
12+
"# Import pieces from codeflare-sdk\n",
13+
"from codeflare_sdk.job.jobs import DDPJobDefinition\n",
14+
"from time import sleep"
15+
]
16+
},
17+
{
18+
"cell_type": "code",
19+
"execution_count": null,
20+
"id": "47ca5c15",
21+
"metadata": {
22+
"tags": ["parameters"]
23+
},
24+
"outputs": [],
25+
"source": [
26+
"#parameters\n",
27+
"namespace = \"default\""
28+
]
29+
},
30+
{
31+
"cell_type": "code",
32+
"execution_count": null,
33+
"id": "26b21373",
34+
"metadata": {},
35+
"outputs": [],
36+
"source": [
37+
"job = DDPJobDefinition(name=\"mnistjob\", script=\"mnist.py\", scheduler_args={\"namespace\": namespace}, j=\"1x1\", gpu=0, cpu=1, memMB=2000, image=\"quay.io/project-codeflare/mnist-job-test:v0.0.1\").submit()"
38+
]
39+
},
40+
{
41+
"cell_type": "code",
42+
"execution_count": null,
43+
"id": "d24e9f95",
44+
"metadata": {},
45+
"outputs": [],
46+
"source": [
47+
"finished = False\n",
48+
"while not finished:\n",
49+
" sleep(1)\n",
50+
" try:\n",
51+
" finished = (\"Epoch 4: 100%\" in job.logs())\n",
52+
" except:\n",
53+
" finished = False"
54+
]
55+
},
56+
{
57+
"cell_type": "code",
58+
"execution_count": null,
59+
"id": "f078b7cd",
60+
"metadata": {},
61+
"outputs": [],
62+
"source": [
63+
"job.cancel()"
64+
]
65+
}
66+
],
67+
"metadata": {
68+
"kernelspec": {
69+
"display_name": "Python 3 (ipykernel)",
70+
"language": "python",
71+
"name": "python3"
72+
},
73+
"language_info": {
74+
"codemirror_mode": {
75+
"name": "ipython",
76+
"version": 3
77+
},
78+
"file_extension": ".py",
79+
"mimetype": "text/x-python",
80+
"name": "python",
81+
"nbconvert_exporter": "python",
82+
"pygments_lexer": "ipython3",
83+
"version": "3.9.13"
84+
},
85+
"vscode": {
86+
"interpreter": {
87+
"hash": "f9f85f796d01129d0dd105a088854619f454435301f6ffec2fea96ecbd9be4ac"
88+
}
89+
}
90+
},
91+
"nbformat": 4,
92+
"nbformat_minor": 5
93+
}

0 commit comments

Comments
 (0)