Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ require (
)

require (
dario.cat/mergo v1.0.2 // indirect
github.com/Azure/go-ansiterm v0.0.0-20230124172434-306776ec8161 // indirect
github.com/MakeNowJust/heredoc v1.0.0 // indirect
github.com/beorn7/perks v1.0.1 // indirect
Expand Down
2 changes: 2 additions & 0 deletions go.sum

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

39 changes: 37 additions & 2 deletions kubectl-plugin/pkg/util/generation/generation.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (
"strconv"
"strings"

"dario.cat/mergo"
"gopkg.in/yaml.v2"
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
Expand Down Expand Up @@ -444,11 +445,45 @@ func ParseConfigFile(filePath string) (*RayClusterConfig, error) {
return nil, fmt.Errorf("failed to read config file: %w", err)
}

config := newRayClusterConfigWithDefaults()
if err := yaml.UnmarshalStrict(data, &config); err != nil {
var overrideConfig RayClusterConfig
if err := yaml.UnmarshalStrict(data, &overrideConfig); err != nil {
return nil, fmt.Errorf("failed to parse YAML: %w", err)
}
config, err := mergeWithDefaultConfig(&overrideConfig)
if err != nil {
return nil, err
}
return config, nil
}

func mergeWithDefaultConfig(overrideConfig *RayClusterConfig) (*RayClusterConfig, error) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could this function be simplified a bit ? It seems not need to handle some merges manually.

https://go.dev/play/p/KqVRVqXQVJO

Copy link
Contributor Author

@CheyuWu CheyuWu Jul 22, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hi @fscnick thanks for reviewing
I initially considered using mergo.Merge(config, &overrideConfig, mergo.WithOverride), but it introduces an unintended side effect when merging slice fields like worker-groups.

For example, consider the following minimal config.yaml:

worker-groups:
  - cpu: 3

If we directly call:

config := newRayClusterConfigWithDefaults()
err := mergo.Merge(config, &overrideConfig, mergo.WithOverride)

Then overrideConfig.WorkerGroups will completely overwrite the default WorkerGroups defined in config, instead of preserving and partially overriding fields (e.g., memory limits or container settings). This leads to missing default values in the final rendered output. As a result, the merged config will produce:

resources:
  limits: {}
  requests:
    cpu: "3"

But the expected output should retain default memory settings like:

resources:
  limits:
    memory: 4Gi
  requests:
    cpu: "3"
    memory: 4Gi

So, I used a more sophisticated approach to handle this.

Copy link
Contributor Author

@CheyuWu CheyuWu Jul 22, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we omit mergo.WithOverride and simply call:

config := newRayClusterConfigWithDefaults()
err := mergo.Merge(config, &overrideConfig)

Then the fields in overrideConfig will only be merged if they are not already set in config. As a result, the WorkerGroups defined in config.yaml will be ignored entirely if the default already includes a value. This leads to the user-provided override being silently ignored.

For example, the output might look like this:

containers:
- image: rayproject/ray:2.46.0
  name: ray-worker
  resources:
    limits:
      memory: 4Gi
    requests:
      cpu: "2"
      memory: 4Gi

Here, the cpu: 3 specified by the user in config.yaml is not reflected in the final output — the default cpu: 2 remains.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There are three thoughts about handling this slice.

  • detach the WorkerGroups, merge it manually and attach back to config.
// detach WorkerGroups
overrideConfigWG := overrideConfig.WorkerGroups
overrideConfig.WorkerGroups = nil

// merge config and overrideConfig
...

// merge WorkerGroups
...

// put back
config.WorkerGroups = mergedWorkerGroups
  • use Transformer in mergo to skip WorkerGroups and merge it manually.
  • use Transformer in mergo to merge it directly.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Great idea !! thx

// detach worker groups from default config
overrideConfigWG := overrideConfig.WorkerGroups
overrideConfig.WorkerGroups = nil

config := newRayClusterConfigWithDefaults()
err := mergo.Merge(config, overrideConfig, mergo.WithOverride)
if err != nil {
return nil, fmt.Errorf("failed to merge config with defaults: %w", err)
}
// merge WorkerGroups and keep the default values for missing fields
// if overrideConfigWG is not nil, we will merge the worker groups from the config file
// and keep the default values for missing fields
if overrideConfigWG != nil {
for len(config.WorkerGroups) < len(overrideConfigWG) {
config.WorkerGroups = append(config.WorkerGroups, WorkerGroup{
Replicas: util.DefaultWorkerReplicas,
CPU: ptr.To(util.DefaultWorkerCPU),
Memory: ptr.To(util.DefaultWorkerMemory),
})
}
for i, workerGroup := range overrideConfigWG {
err := mergo.Merge(&config.WorkerGroups[i], workerGroup, mergo.WithOverride)
if err != nil {
return nil, fmt.Errorf("failed to merge worker group %d: %w", i, err)
}
}
}
return config, nil
}

Expand Down
142 changes: 142 additions & 0 deletions kubectl-plugin/pkg/util/generation/generation_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -875,8 +875,11 @@ func TestParseConfigFile(t *testing.T) {
},
WorkerGroups: []WorkerGroup{
{
Name: ptr.To("default-group"),
Replicas: int32(1),
CPU: ptr.To("2"),
GPU: ptr.To("1"),
Memory: ptr.To("4Gi"),
},
},
},
Expand Down Expand Up @@ -984,6 +987,145 @@ gke:
},
},
},
"override ray-version, image, service-account": {
config: `
ray-version: 4.16.0
image: custom/image:tag
service-account: svcacct
`,
expected: &RayClusterConfig{
RayVersion: ptr.To("4.16.0"),
Image: ptr.To("custom/image:tag"),
ServiceAccount: ptr.To("svcacct"),
Head: &Head{
CPU: ptr.To("2"),
Memory: ptr.To("4Gi"),
},
WorkerGroups: []WorkerGroup{
{
Name: ptr.To("default-group"),
Replicas: 1,
CPU: ptr.To("2"),
Memory: ptr.To("4Gi"),
},
},
},
},
"override only head CPU": {
config: `
head:
cpu: "8"
`,
expected: &RayClusterConfig{
RayVersion: ptr.To(util.RayVersion),
Image: ptr.To(fmt.Sprintf("rayproject/ray:%s", util.RayVersion)),
Head: &Head{
CPU: ptr.To("8"),
Memory: ptr.To("4Gi"),
},
WorkerGroups: []WorkerGroup{
{
Name: ptr.To("default-group"),
Replicas: 1,
CPU: ptr.To("2"),
Memory: ptr.To("4Gi"),
},
},
},
},
"override worker group with only CPU": {
config: `
worker-groups:
- cpu: "1"
`,
expected: &RayClusterConfig{
RayVersion: ptr.To(util.RayVersion),
Image: ptr.To(fmt.Sprintf("rayproject/ray:%s", util.RayVersion)),
Head: &Head{
CPU: ptr.To("2"),
Memory: ptr.To("4Gi"),
},
WorkerGroups: []WorkerGroup{
{
Name: ptr.To("default-group"),
Replicas: 1,
CPU: ptr.To("1"),
Memory: ptr.To("4Gi"),
},
},
},
},
"override worker group with empty name": {
config: `
worker-groups:
- name: ""
replicas: 2
`,
expected: &RayClusterConfig{
RayVersion: ptr.To(util.RayVersion),
Image: ptr.To(fmt.Sprintf("rayproject/ray:%s", util.RayVersion)),
Head: &Head{
CPU: ptr.To("2"),
Memory: ptr.To("4Gi"),
},
WorkerGroups: []WorkerGroup{
{
Name: ptr.To("default-group"),
Replicas: 2,
CPU: ptr.To("2"),
Memory: ptr.To("4Gi"),
},
},
},
},
"override worker group with replicas = 0": {
config: `
worker-groups:
- name: "wg1"
replicas: 0
`,
expected: &RayClusterConfig{
RayVersion: ptr.To(util.RayVersion),
Image: ptr.To(fmt.Sprintf("rayproject/ray:%s", util.RayVersion)),
Head: &Head{
CPU: ptr.To("2"),
Memory: ptr.To("4Gi"),
},
WorkerGroups: []WorkerGroup{
{
Name: ptr.To("wg1"),
Replicas: 1, // fallback to default
CPU: ptr.To("2"),
Memory: ptr.To("4Gi"),
},
},
},
},
"override autoscaler": {
config: `
autoscaler:
version: v2
`,
expected: &RayClusterConfig{
RayVersion: ptr.To(util.RayVersion),
Image: ptr.To(fmt.Sprintf("rayproject/ray:%s", util.RayVersion)),
Head: &Head{
CPU: ptr.To("2"),
Memory: ptr.To("4Gi"),
},
WorkerGroups: []WorkerGroup{
{
Name: ptr.To("default-group"),
Replicas: 1,
CPU: ptr.To("2"),
Memory: ptr.To("4Gi"),
},
},
Autoscaler: &Autoscaler{
Version: AutoscalerV2,
},
},
},
}

for name, test := range tests {
Expand Down
Loading