Skip to content

Commit 62831c4

Browse files
authored
Add KubeRay tests for Ray APIs (#249)
* Add KubeRay tests for Ray APIs Signed-off-by: Hemil Desai <[email protected]> * fix Signed-off-by: Hemil Desai <[email protected]> --------- Signed-off-by: Hemil Desai <[email protected]>
1 parent dae14e3 commit 62831c4

File tree

5 files changed

+2269
-0
lines changed

5 files changed

+2269
-0
lines changed
Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
apiVersion: ray.io/v1alpha1
2+
kind: RayCluster
3+
metadata:
4+
name: ml-training-cluster
5+
namespace: ml-team
6+
labels:
7+
team: ml
8+
env: prod
9+
spec:
10+
rayVersion: 2.43.0
11+
headGroupSpec:
12+
serviceType: ClusterIP
13+
rayStartParams:
14+
dashboard-host: 0.0.0.0
15+
num-cpus: '4'
16+
template:
17+
spec:
18+
containers:
19+
- image: custom/ray:gpu
20+
name: ray-head
21+
ports: []
22+
env:
23+
- name: NCCL_DEBUG
24+
value: INFO
25+
lifecycle:
26+
preStop:
27+
exec:
28+
command:
29+
- /bin/sh
30+
- -c
31+
- ray stop
32+
resources:
33+
requests:
34+
cpu: '4'
35+
memory: 16Gi
36+
limits:
37+
cpu: '4'
38+
memory: 16Gi
39+
volumeMounts:
40+
- name: data
41+
mountPath: /data
42+
volumes:
43+
- name: data
44+
persistentVolumeClaim:
45+
claimName: data-pvc
46+
workerGroupSpecs:
47+
- groupName: gpu-workers
48+
maxReplicas: 8
49+
minReplicas: 2
50+
rayStartParams: {}
51+
replicas: 4
52+
template:
53+
spec:
54+
containers:
55+
- image: custom/ray:gpu
56+
name: ray-worker
57+
env:
58+
- name: NCCL_DEBUG
59+
value: INFO
60+
lifecycle:
61+
preStop:
62+
exec:
63+
command:
64+
- /bin/sh
65+
- -c
66+
- ray stop
67+
resources:
68+
requests:
69+
cpu: '8'
70+
memory: 32Gi
71+
nvidia.com/gpu: 2
72+
limits:
73+
nvidia.com/gpu: 2
74+
volumeMounts:
75+
- name: data
76+
mountPath: /data
77+
volumes:
78+
- name: data
79+
persistentVolumeClaim:
80+
claimName: data-pvc
Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
apiVersion: ray.io/v1alpha1
2+
kind: RayCluster
3+
metadata:
4+
name: test-cluster
5+
namespace: default
6+
labels: {}
7+
spec:
8+
rayVersion: 2.43.0
9+
headGroupSpec:
10+
serviceType: ClusterIP
11+
rayStartParams:
12+
dashboard-host: 0.0.0.0
13+
template:
14+
spec:
15+
containers:
16+
- image: rayproject/ray:2.43.0
17+
name: ray-head
18+
ports: []
19+
env: []
20+
lifecycle:
21+
preStop:
22+
exec:
23+
command:
24+
- /bin/sh
25+
- -c
26+
- ray stop
27+
resources:
28+
requests:
29+
cpu: '1'
30+
memory: 2Gi
31+
limits:
32+
cpu: '1'
33+
memory: 2Gi
34+
volumeMounts: []
35+
volumes: []
36+
workerGroupSpecs:
37+
- groupName: workers
38+
maxReplicas: 2
39+
minReplicas: 2
40+
rayStartParams: {}
41+
replicas: 2
42+
template:
43+
spec:
44+
containers:
45+
- image: rayproject/ray:2.43.0
46+
name: ray-worker
47+
env: []
48+
lifecycle:
49+
preStop:
50+
exec:
51+
command:
52+
- /bin/sh
53+
- -c
54+
- ray stop
55+
resources:
56+
requests:
57+
cpu: '2'
58+
memory: 4Gi
59+
limits: {}
60+
volumeMounts: []
61+
volumes: []
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
apiVersion: ray.io/v1
2+
kind: RayJob
3+
metadata:
4+
name: test-job
5+
namespace: default
6+
spec:
7+
entrypoint: python train.py
8+
shutdownAfterJobFinishes: true
9+
rayClusterSpec: {}
10+
runtimeEnvYAML: null

test/run/ray/__init__.py

Whitespace-only changes.

0 commit comments

Comments
 (0)