diff --git a/charts/kthena/charts/workload/crds/workload.serving.volcano.sh_modelservings.yaml b/charts/kthena/charts/workload/crds/workload.serving.volcano.sh_modelservings.yaml index 7d871638f..84d4361a8 100644 --- a/charts/kthena/charts/workload/crds/workload.serving.volcano.sh_modelservings.yaml +++ b/charts/kthena/charts/workload/crds/workload.serving.volcano.sh_modelservings.yaml @@ -8857,6 +8857,7 @@ spec: format: int32 type: integer workerReplicas: + default: 0 description: |- WorkerReplicas defines the number for the worker pod of a role. Required: Need to set the number of worker-pod replicas. @@ -17495,7 +17496,6 @@ spec: required: - entryTemplate - name - - workerReplicas type: object maxItems: 4 minItems: 1 diff --git a/docs/kthena/docs/reference/crd/workload.serving.volcano.sh.md b/docs/kthena/docs/reference/crd/workload.serving.volcano.sh.md index d15403c46..5f9e65e34 100644 --- a/docs/kthena/docs/reference/crd/workload.serving.volcano.sh.md +++ b/docs/kthena/docs/reference/crd/workload.serving.volcano.sh.md @@ -661,7 +661,7 @@ _Appears in:_ | `name` _string_ | The name of a role. Name must be unique within an ServingGroup | | MaxLength: 12
Pattern: `^[a-zA-Z0-9]([-a-zA-Z0-9]*[a-zA-Z0-9])?$`
| | `replicas` _integer_ | The number of a certain role.
For example, in Disaggregated Prefilling, setting the replica count for both the P and D roles to 1 results in 1P1D deployment configuration.
This approach can similarly be applied to configure a xPyD deployment scenario.
Default to 1. | 1 | | | `entryTemplate` _[PodTemplateSpec](#podtemplatespec)_ | EntryTemplate defines the template for the entry pod of a role.
Required: Currently, a role must have only one entry-pod. | | | -| `workerReplicas` _integer_ | WorkerReplicas defines the number for the worker pod of a role.
Required: Need to set the number of worker-pod replicas. | | | +| `workerReplicas` _integer_ | WorkerReplicas defines the number for the worker pod of a role.
Required: Need to set the number of worker-pod replicas. | 0 | | | `workerTemplate` _[PodTemplateSpec](#podtemplatespec)_ | WorkerTemplate defines the template for the worker pod of a role. | | | diff --git a/pkg/apis/workload/v1alpha1/servinggroup_types.go b/pkg/apis/workload/v1alpha1/servinggroup_types.go index c4cf2d4af..a357bb391 100644 --- a/pkg/apis/workload/v1alpha1/servinggroup_types.go +++ b/pkg/apis/workload/v1alpha1/servinggroup_types.go @@ -72,7 +72,9 @@ type Role struct { // WorkerReplicas defines the number for the worker pod of a role. // Required: Need to set the number of worker-pod replicas. - WorkerReplicas int32 `json:"workerReplicas"` + // +optional + // +kubebuilder:default=0 + WorkerReplicas int32 `json:"workerReplicas,omitempty"` // WorkerTemplate defines the template for the worker pod of a role. // +optional diff --git a/pkg/model-serving-controller/controller/model_serving_controller_test.go b/pkg/model-serving-controller/controller/model_serving_controller_test.go index 97548c946..2e8c30499 100644 --- a/pkg/model-serving-controller/controller/model_serving_controller_test.go +++ b/pkg/model-serving-controller/controller/model_serving_controller_test.go @@ -3765,10 +3765,8 @@ func TestManageHeadlessService(t *testing.T) { Template: workloadv1alpha1.ServingGroup{ Roles: []workloadv1alpha1.Role{ { - Name: "prefill", - Replicas: ptr.To[int32](1), - WorkerReplicas: 0, - WorkerTemplate: nil, // No worker template + Name: "prefill", + Replicas: ptr.To[int32](1), }, }, },