File tree Expand file tree Collapse file tree 2 files changed +952
-0
lines changed
notebooks/ray-experiments Expand file tree Collapse file tree 2 files changed +952
-0
lines changed Original file line number Diff line number Diff line change 1+ apiVersion : mcad.ibm.com/v1beta1
2+ kind : AppWrapper
3+ metadata :
4+ name : finetuneflan
5+ namespace : default
6+ spec :
7+ priority : 9
8+ resources :
9+ GenericItems :
10+ - custompodresources :
11+ - limits :
12+ cpu : 2
13+ memory : 8G
14+ nvidia.com/gpu : 0
15+ replicas : 1
16+ requests :
17+ cpu : 2
18+ memory : 8G
19+ nvidia.com/gpu : 0
20+ - limits :
21+ cpu : 2
22+ memory : 8G
23+ nvidia.com/gpu : 1
24+ replicas : 2
25+ requests :
26+ cpu : 1
27+ memory : 2G
28+ nvidia.com/gpu : 1
29+ generictemplate :
30+ apiVersion : ray.io/v1alpha1
31+ kind : RayCluster
32+ metadata :
33+ labels :
34+ appwrapper.mcad.ibm.com : finetuneflan
35+ controller-tools.k8s.io : ' 1.0'
36+ name : finetuneflan
37+ namespace : default
38+ spec :
39+ autoscalerOptions :
40+ idleTimeoutSeconds : 60
41+ imagePullPolicy : Always
42+ resources :
43+ limits :
44+ cpu : 500m
45+ memory : 512Mi
46+ requests :
47+ cpu : 500m
48+ memory : 512Mi
49+ upscalingMode : Default
50+ enableInTreeAutoscaling : false
51+ headGroupSpec :
52+ rayStartParams :
53+ block : ' true'
54+ dashboard-host : 0.0.0.0
55+ num-gpus : ' 0'
56+ serviceType : ClusterIP
57+ template :
58+ spec :
59+ containers :
60+ - env :
61+ - name : MY_POD_IP
62+ valueFrom :
63+ fieldRef :
64+ fieldPath : status.podIP
65+ image : ghcr.io/foundation-model-stack/base:ray2.1.0-py38-gpu-pytorch1.12.0cu116-20221213-193103
66+ imagePullPolicy : Always
67+ lifecycle :
68+ preStop :
69+ exec :
70+ command :
71+ - /bin/sh
72+ - -c
73+ - ray stop
74+ name : ray-head
75+ ports :
76+ - containerPort : 6379
77+ name : gcs
78+ - containerPort : 8265
79+ name : dashboard
80+ - containerPort : 10001
81+ name : client
82+ resources :
83+ limits :
84+ cpu : 2
85+ memory : 8G
86+ nvidia.com/gpu : 0
87+ requests :
88+ cpu : 2
89+ memory : 8G
90+ nvidia.com/gpu : 0
91+ rayVersion : 1.12.0
92+ workerGroupSpecs :
93+ - groupName : small-group-finetuneflan
94+ maxReplicas : 2
95+ minReplicas : 2
96+ rayStartParams :
97+ block : ' true'
98+ num-gpus : ' 1'
99+ replicas : 2
100+ template :
101+ metadata :
102+ annotations :
103+ key : value
104+ labels :
105+ key : value
106+ spec :
107+ containers :
108+ - env :
109+ - name : MY_POD_IP
110+ valueFrom :
111+ fieldRef :
112+ fieldPath : status.podIP
113+ image : ghcr.io/foundation-model-stack/base:ray2.1.0-py38-gpu-pytorch1.12.0cu116-20221213-193103
114+ lifecycle :
115+ preStop :
116+ exec :
117+ command :
118+ - /bin/sh
119+ - -c
120+ - ray stop
121+ name : machine-learning
122+ resources :
123+ limits :
124+ cpu : 2
125+ memory : 8G
126+ nvidia.com/gpu : 1
127+ requests :
128+ cpu : 1
129+ memory : 2G
130+ nvidia.com/gpu : 1
131+ initContainers :
132+ - command :
133+ - sh
134+ - -c
135+ - until nslookup $RAY_IP.$(cat /var/run/secrets/kubernetes.io/serviceaccount/namespace).svc.cluster.local;
136+ do echo waiting for myservice; sleep 2; done
137+ image : busybox:1.28
138+ name : init-myservice
139+ replicas : 1
140+ - generictemplate :
141+ apiVersion : route.openshift.io/v1
142+ kind : Route
143+ metadata :
144+ labels :
145+ odh-ray-cluster-service : finetuneflan-head-svc
146+ name : ray-dashboard-finetuneflan
147+ namespace : default
148+ spec :
149+ port :
150+ targetPort : dashboard
151+ to :
152+ kind : Service
153+ name : finetuneflan-head-svc
154+ replica : 1
155+ Items : []
You can’t perform that action at this time.
0 commit comments