|
| 1 | +/* |
| 2 | +Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve. |
| 3 | +
|
| 4 | +Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | +you may not use this file except in compliance with the License. |
| 6 | +You may obtain a copy of the License at |
| 7 | +
|
| 8 | + http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | +
|
| 10 | +Unless required by applicable law or agreed to in writing, software |
| 11 | +distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | +See the License for the specific language governing permissions and |
| 14 | +limitations under the License. |
| 15 | +*/ |
| 16 | + |
| 17 | +package v1 |
| 18 | + |
| 19 | +import ( |
| 20 | + commonv1 "github.com/kubeflow/common/pkg/apis/common/v1" |
| 21 | + autoscalingv2 "k8s.io/api/autoscaling/v2beta2" |
| 22 | + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" |
| 23 | +) |
| 24 | + |
| 25 | +const ( |
| 26 | + // PaddleJobDefaultPortName is name of the port used to communicate between Master and |
| 27 | + // workers. |
| 28 | + PaddleJobDefaultPortName = "master" |
| 29 | + // PaddleJobDefaultContainerName is the name of the PaddleJob container. |
| 30 | + PaddleJobDefaultContainerName = "paddle" |
| 31 | + // PaddleJobDefaultPort is default value of the port. |
| 32 | + PaddleJobDefaultPort = 36543 |
| 33 | + // PaddleJobDefaultRestartPolicy is default RestartPolicy for PaddleReplicaSpec. |
| 34 | + PaddleJobDefaultRestartPolicy = commonv1.RestartPolicyOnFailure |
| 35 | + // PaddleJobKind is the kind name. |
| 36 | + PaddleJobKind = "PaddleJob" |
| 37 | + // PaddleJobPlural is the PaddlePlural for paddleJob. |
| 38 | + PaddleJobPlural = "paddlejobs" |
| 39 | + // PaddleJobSingular is the singular for paddleJob. |
| 40 | + PaddleJobSingular = "paddlejob" |
| 41 | + // PaddleJobFrameworkName is the name of the ML Framework |
| 42 | + PaddleJobFrameworkName = "paddle" |
| 43 | + // PaddleJobReplicaTypeMaster is the type of Master of distributed Paddle |
| 44 | + PaddleJobReplicaTypeMaster commonv1.ReplicaType = "Master" |
| 45 | + // PaddleJobReplicaTypeWorker is the type for workers of distributed Paddle. |
| 46 | + PaddleJobReplicaTypeWorker commonv1.ReplicaType = "Worker" |
| 47 | +) |
| 48 | + |
| 49 | +// +genclient |
| 50 | +// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object |
| 51 | +// +resource:path=paddlejob |
| 52 | +//+kubebuilder:object:root=true |
| 53 | +//+kubebuilder:subresource:status |
| 54 | +//+kubebuilder:printcolumn:name="State",type=string,JSONPath=`.status.conditions[-1:].type` |
| 55 | +//+kubebuilder:printcolumn:name="Age",type=date,JSONPath=`.metadata.creationTimestamp` |
| 56 | +// +kubebuilder:subresource:scale:specpath=.spec.paddleReplicaSpecs.Worker.replicas,statuspath=.status.replicaStatuses.Worker.active,selectorpath=.status.replicaStatuses.Worker.selector |
| 57 | + |
| 58 | +// PaddleJob Represents a PaddleJob resource. |
| 59 | +type PaddleJob struct { |
| 60 | + // Standard Kubernetes type metadata. |
| 61 | + metav1.TypeMeta `json:",inline"` |
| 62 | + |
| 63 | + metav1.ObjectMeta `json:"metadata,omitempty"` |
| 64 | + |
| 65 | + // Specification of the desired state of the PaddleJob. |
| 66 | + Spec PaddleJobSpec `json:"spec,omitempty"` |
| 67 | + |
| 68 | + // Most recently observed status of the PaddleJob. |
| 69 | + // Read-only (modified by the system). |
| 70 | + Status commonv1.JobStatus `json:"status,omitempty"` |
| 71 | +} |
| 72 | + |
| 73 | +// PaddleJobSpec is a desired state description of the PaddleJob. |
| 74 | +type PaddleJobSpec struct { |
| 75 | + // RunPolicy encapsulates various runtime policies of the distributed training |
| 76 | + // job, for example how to clean up resources and how long the job can stay |
| 77 | + // active. |
| 78 | + //+kubebuilder:validation:Optional |
| 79 | + RunPolicy commonv1.RunPolicy `json:"runPolicy"` |
| 80 | + |
| 81 | + // ElasticPolicy holds the elastic policy for paddle job. |
| 82 | + ElasticPolicy *PaddleElasticPolicy `json:"elasticPolicy,omitempty"` |
| 83 | + |
| 84 | + // A map of PaddleReplicaType (type) to ReplicaSpec (value). Specifies the Paddle cluster configuration. |
| 85 | + // For example, |
| 86 | + // { |
| 87 | + // "Master": PaddleReplicaSpec, |
| 88 | + // "Worker": PaddleReplicaSpec, |
| 89 | + // } |
| 90 | + PaddleReplicaSpecs map[commonv1.ReplicaType]*commonv1.ReplicaSpec `json:"paddleReplicaSpecs"` |
| 91 | +} |
| 92 | + |
| 93 | +type PaddleElasticPolicy struct { |
| 94 | + // minReplicas is the lower limit for the number of replicas to which the training job |
| 95 | + // can scale down. It defaults to null. |
| 96 | + // +optional |
| 97 | + MinReplicas *int32 `json:"minReplicas,omitempty"` |
| 98 | + // upper limit for the number of pods that can be set by the autoscaler; cannot be smaller than MinReplicas, defaults to null. |
| 99 | + // +optional |
| 100 | + MaxReplicas *int32 `json:"maxReplicas,omitempty"` |
| 101 | + |
| 102 | + // MaxRestarts is the limit for restart times of pods in elastic mode. |
| 103 | + // +optional |
| 104 | + MaxRestarts *int32 `json:"maxRestarts,omitempty"` |
| 105 | + |
| 106 | + // Metrics contains the specifications which are used to calculate the |
| 107 | + // desired replica count (the maximum replica count across all metrics will |
| 108 | + // be used). The desired replica count is calculated with multiplying the |
| 109 | + // ratio between the target value and the current value by the current |
| 110 | + // number of pods. Ergo, metrics used must decrease as the pod count is |
| 111 | + // increased, and vice-versa. See the individual metric source types for |
| 112 | + // more information about how each type of metric must respond. |
| 113 | + // If not set, the HPA will not be created. |
| 114 | + // +optional |
| 115 | + Metrics []autoscalingv2.MetricSpec `json:"metrics,omitempty"` |
| 116 | +} |
| 117 | + |
| 118 | +// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object |
| 119 | +// +resource:path=paddlejobs |
| 120 | +//+kubebuilder:object:root=true |
| 121 | + |
| 122 | +// PaddleJobList is a list of PaddleJobs. |
| 123 | +type PaddleJobList struct { |
| 124 | + // Standard type metadata. |
| 125 | + metav1.TypeMeta `json:",inline"` |
| 126 | + |
| 127 | + // Standard list metadata. |
| 128 | + metav1.ListMeta `json:"metadata,omitempty"` |
| 129 | + |
| 130 | + // List of PaddleJobs. |
| 131 | + Items []PaddleJob `json:"items"` |
| 132 | +} |
0 commit comments