Skip to content

Commit 3d3bfb0

Browse files
authored
Merge pull request #281 from nebius/dev
Soperator release 1.16.1
2 parents 4c06b19 + 5356644 commit 3d3bfb0

33 files changed

+461
-237
lines changed

VERSION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
1.16.0
1+
1.16.1

api/v1/slurmcluster_types.go

Lines changed: 35 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,7 @@ type SlurmClusterSpec struct {
7777
// SlurmConfig represents the Slurm configuration in slurm.conf. Not all options are supported.
7878
//
7979
// +kubebuilder:validation:Optional
80+
// +kubebuilder:default={defMemPerNode: 1228800, defCpuPerGPU: 16, completeWait: 5, debugFlags: "Cgroup,CPU_Bind,Gres,JobComp,Priority,Script,SelectType,Steps,TraceJobs", taskPluginParam: "Verbose", maxJobCount: 10000, minJobAge: 86400}
8081
SlurmConfig SlurmConfig `json:"slurmConfig,omitempty"`
8182
}
8283

@@ -86,27 +87,39 @@ type SlurmConfig struct {
8687
//
8788
// +kubebuilder:validation:Optional
8889
// +kubebuilder:default=1228800
89-
DefMemPerNode int32 `json:"defMemPerNode,omitempty"`
90+
DefMemPerNode *int32 `json:"defMemPerNode,omitempty"`
9091
// Default count of CPUs allocated per allocated GPU
9192
//
9293
// +kubebuilder:validation:Optional
9394
// +kubebuilder:default=16
94-
DefCpuPerGPU int32 `json:"defCpuPerGPU,omitempty"`
95+
DefCpuPerGPU *int32 `json:"defCpuPerGPU,omitempty"`
9596
// The time to wait, in seconds, when any job is in the COMPLETING state before any additional jobs are scheduled.
9697
//
9798
// +kubebuilder:validation:Optional
9899
// +kubebuilder:default=5
99-
CompleteWait int32 `json:"completeWait,omitempty"`
100+
CompleteWait *int32 `json:"completeWait,omitempty"`
100101
// Defines specific subsystems which should provide more detailed event logging.
101102
//
102103
// +kubebuilder:validation:Optional
103104
// +kubebuilder:default="Cgroup,CPU_Bind,Gres,JobComp,Priority,Script,SelectType,Steps,TraceJobs"
104105
// +kubebuilder:validation:Pattern="^((Accrue|Agent|AuditRPCs|Backfill|BackfillMap|BurstBuffer|Cgroup|ConMgr|CPU_Bind|CpuFrequency|Data|DBD_Agent|Dependency|Elasticsearch|Energy|Federation|FrontEnd|Gres|Hetjob|Gang|GLOB_SILENCE|JobAccountGather|JobComp|JobContainer|License|Network|NetworkRaw|NodeFeatures|NO_CONF_HASH|Power|Priority|Profile|Protocol|Reservation|Route|Script|SelectType|Steps|Switch|TLS|TraceJobs|Triggers)(,)?)+$"
105-
DebugFlags string `json:"debugFlags,omitempty"`
106+
DebugFlags *string `json:"debugFlags,omitempty"`
107+
// Additional parameters for the task plugin
108+
//
106109
// +kubebuilder:validation:Optional
107110
// +kubebuilder:default="Verbose"
108111
// +kubebuilder:validation:Pattern="^((None|Cores|Sockets|Threads|SlurmdOffSpec|OOMKillStep|Verbose|Autobind)(,)?)+$"
109-
TaskPluginParam string `json:"taskPluginParam,omitempty"`
112+
TaskPluginParam *string `json:"taskPluginParam,omitempty"`
113+
// Keep N last jobs in controller memory
114+
//
115+
// +kubebuilder:validation:Optional
116+
// +kubebuilder:default=10000
117+
MaxJobCount *int32 `json:"maxJobCount,omitempty"`
118+
// Don't remove jobs from controller memory after some time
119+
//
120+
// +kubebuilder:validation:Optional
121+
// +kubebuilder:default=86400
122+
MinJobAge *int32 `json:"minJobAge,omitempty"`
110123
}
111124

112125
type PartitionConfiguration struct {
@@ -547,33 +560,32 @@ type SlurmdbdConfig struct {
547560

548561
type AccountingSlurmConf struct {
549562
// +kubebuilder:validation:Optional
550-
AccountingStorageTRES string `json:"accountingStorageTRES,omitempty"`
563+
AccountingStorageTRES *string `json:"accountingStorageTRES,omitempty"`
551564
// +kubebuilder:validation:Optional
552-
AccountingStoreFlags string `json:"accountingStoreFlags,omitempty"`
565+
AccountingStoreFlags *string `json:"accountingStoreFlags,omitempty"`
553566
// +kubebuilder:validation:Optional
554-
AcctGatherInterconnectType string `json:"acctGatherInterconnectType,omitempty"`
567+
AcctGatherInterconnectType *string `json:"acctGatherInterconnectType,omitempty"`
555568
// +kubebuilder:validation:Optional
556-
AcctGatherFilesystemType string `json:"acctGatherFilesystemType,omitempty"`
569+
AcctGatherFilesystemType *string `json:"acctGatherFilesystemType,omitempty"`
557570
// +kubebuilder:validation:Optional
558-
AcctGatherProfileType string `json:"acctGatherProfileType,omitempty"`
571+
AcctGatherProfileType *string `json:"acctGatherProfileType,omitempty"`
559572
// +kubebuilder:validation:Optional
560573
// +kubebuilder:validation:Enum="jobacct_gather/linux";"jobacct_gather/cgroup";"jobacct_gather/none"
561-
JobAcctGatherType string `json:"jobAcctGatherType,omitempty"`
574+
JobAcctGatherType *string `json:"jobAcctGatherType,omitempty"`
562575
// +kubebuilder:validation:Optional
563576
// +kubebuilder:default=30
564-
JobAcctGatherFrequency int `json:"jobAcctGatherFrequency,omitempty"`
577+
JobAcctGatherFrequency *int `json:"jobAcctGatherFrequency,omitempty"`
565578
// +kubebuilder:validation:Optional
566579
// +kubebuilder:validation:Enum="NoShared";"UsePss";"OverMemoryKill";"DisableGPUAcct"
567-
JobAcctGatherParams string `json:"jobAcctGatherParams,omitempty"`
580+
JobAcctGatherParams *string `json:"jobAcctGatherParams,omitempty"`
568581
// +kubebuilder:validation:Optional
569582
// +kubebuilder:default=0
570-
PriorityWeightAge int16 `json:"priorityWeightAge,omitempty"`
583+
PriorityWeightAge *int16 `json:"priorityWeightAge,omitempty"`
571584
// +kubebuilder:validation:Optional
572585
// +kubebuilder:default=0
573-
PriorityWeightFairshare int16 `json:"priorityWeightFairshare,omitempty"`
586+
PriorityWeightFairshare *int16 `json:"priorityWeightFairshare,omitempty"`
574587
// +kubebuilder:validation:Optional
575-
// +kubebuilder:default=0
576-
PriorityWeightTRES int16 `json:"priorityWeightTRES,omitempty"`
588+
PriorityWeightTRES *string `json:"priorityWeightTRES,omitempty"`
577589
}
578590

579591
// SlurmNodeController defines the configuration for the Slurm controller node
@@ -645,6 +657,12 @@ type SlurmNodeWorker struct {
645657
// +kubebuilder:validation:Optional
646658
// +kubebuilder:default=false
647659
EnableGDRCopy bool `json:"enableGDRCopy,omitempty"`
660+
661+
// SlurmNodeExtra defines the string that will be set to the "Extra" field of the corresponding Slurm node. It can
662+
// use any environment variables that are available in the slurmd container when it starts.
663+
//
664+
// +kubebuilder:validation:Optional
665+
SlurmNodeExtra string `json:"slurmNodeExtra,omitempty"`
648666
}
649667

650668
// SlurmNodeWorkerVolumes defines the volumes for the Slurm worker node

api/v1/zz_generated.deepcopy.go

Lines changed: 92 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

config/crd/bases/slurm.nebius.ai_slurmclusters.yaml

Lines changed: 26 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1470,6 +1470,14 @@ spec:
14701470
type: string
14711471
type: object
14721472
slurmConfig:
1473+
default:
1474+
completeWait: 5
1475+
debugFlags: Cgroup,CPU_Bind,Gres,JobComp,Priority,Script,SelectType,Steps,TraceJobs
1476+
defCpuPerGPU: 16
1477+
defMemPerNode: 1228800
1478+
maxJobCount: 10000
1479+
minJobAge: 86400
1480+
taskPluginParam: Verbose
14731481
description: SlurmConfig represents the Slurm configuration in slurm.conf.
14741482
Not all options are supported.
14751483
properties:
@@ -1496,8 +1504,20 @@ spec:
14961504
node in mebibytes.
14971505
format: int32
14981506
type: integer
1507+
maxJobCount:
1508+
default: 10000
1509+
description: Keep N last jobs in controller memory
1510+
format: int32
1511+
type: integer
1512+
minJobAge:
1513+
default: 86400
1514+
description: Don't remove jobs from controller memory after some
1515+
time
1516+
format: int32
1517+
type: integer
14991518
taskPluginParam:
15001519
default: Verbose
1520+
description: Additional parameters for the task plugin
15011521
pattern: ^((None|Cores|Sockets|Threads|SlurmdOffSpec|OOMKillStep|Verbose|Autobind)(,)?)+$
15021522
type: string
15031523
type: object
@@ -2057,8 +2077,7 @@ spec:
20572077
default: 0
20582078
type: integer
20592079
priorityWeightTRES:
2060-
default: 0
2061-
type: integer
2080+
type: string
20622081
type: object
20632082
slurmdbd:
20642083
description: Slurmdbd represents the Slurm database daemon
@@ -3778,6 +3797,11 @@ spec:
37783797
description: Size defines the number of node instances
37793798
format: int32
37803799
type: integer
3800+
slurmNodeExtra:
3801+
description: |-
3802+
SlurmNodeExtra defines the string that will be set to the "Extra" field of the corresponding Slurm node. It can
3803+
use any environment variables that are available in the slurmd container when it starts.
3804+
type: string
37813805
slurmd:
37823806
description: Slurmd represents the Slurm daemon service configuration
37833807
properties:

config/manager/kustomization.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,4 +3,4 @@ resources:
33
images:
44
- name: controller
55
newName: cr.eu-north1.nebius.cloud/soperator/slurm-operator
6-
newTag: 1.16.0
6+
newTag: 1.16.1

config/manager/manager.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,7 @@ spec:
8484
value: "false"
8585
- name: SLURM_OPERATOR_WATCH_NAMESPACES
8686
value: "*"
87-
image: controller:1.16.0
87+
image: controller:1.16.1
8888
imagePullPolicy: Always
8989
name: manager
9090
securityContext:

go.mod

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ toolchain go1.23.3
66

77
require (
88
github.com/go-logr/logr v1.4.2
9-
github.com/onsi/ginkgo/v2 v2.22.0
9+
github.com/onsi/ginkgo/v2 v2.22.1
1010
github.com/onsi/gomega v1.36.1
1111
github.com/open-telemetry/opentelemetry-operator v0.103.0
1212
github.com/pkg/errors v0.9.1
@@ -52,7 +52,7 @@ require (
5252
github.com/google/gnostic-models v0.6.8 // indirect
5353
github.com/google/go-cmp v0.6.0 // indirect
5454
github.com/google/gofuzz v1.2.0 // indirect
55-
github.com/google/pprof v0.0.0-20241029153458-d1b30febd7db // indirect
55+
github.com/google/pprof v0.0.0-20241210010833-40e02aabc2ad // indirect
5656
github.com/google/uuid v1.6.0 // indirect
5757
github.com/imdario/mergo v0.3.16 // indirect
5858
github.com/josharian/intern v1.0.0 // indirect
@@ -75,13 +75,13 @@ require (
7575
go.opentelemetry.io/otel/sdk/metric v1.28.0 // indirect
7676
go.opentelemetry.io/otel/trace v1.28.0 // indirect
7777
go.uber.org/multierr v1.11.0 // indirect
78-
golang.org/x/net v0.30.0 // indirect
78+
golang.org/x/net v0.32.0 // indirect
7979
golang.org/x/oauth2 v0.22.0 // indirect
8080
golang.org/x/sys v0.28.0 // indirect
8181
golang.org/x/term v0.27.0 // indirect
8282
golang.org/x/text v0.21.0 // indirect
8383
golang.org/x/time v0.5.0 // indirect
84-
golang.org/x/tools v0.26.0 // indirect
84+
golang.org/x/tools v0.28.0 // indirect
8585
gomodules.xyz/jsonpatch/v2 v2.4.0 // indirect
8686
google.golang.org/protobuf v1.35.1 // indirect
8787
gopkg.in/inf.v0 v0.9.1 // indirect

go.sum

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -45,8 +45,8 @@ github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeN
4545
github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
4646
github.com/google/gofuzz v1.2.0 h1:xRy4A+RhZaiKjJ1bPfwQ8sedCA+YS2YcCHW6ec7JMi0=
4747
github.com/google/gofuzz v1.2.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
48-
github.com/google/pprof v0.0.0-20241029153458-d1b30febd7db h1:097atOisP2aRj7vFgYQBbFN4U4JNXUNYpxael3UzMyo=
49-
github.com/google/pprof v0.0.0-20241029153458-d1b30febd7db/go.mod h1:vavhavw2zAxS5dIdcRluK6cSGGPlZynqzFM8NdvU144=
48+
github.com/google/pprof v0.0.0-20241210010833-40e02aabc2ad h1:a6HEuzUHeKH6hwfN/ZoQgRgVIWFJljSWa/zetS2WTvg=
49+
github.com/google/pprof v0.0.0-20241210010833-40e02aabc2ad/go.mod h1:vavhavw2zAxS5dIdcRluK6cSGGPlZynqzFM8NdvU144=
5050
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
5151
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
5252
github.com/imdario/mergo v0.3.16 h1:wwQJbIsHYGMUyLSPrEq1CT16AhnhNJQ51+4fdHUnCl4=
@@ -72,8 +72,8 @@ github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9G
7272
github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk=
7373
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA=
7474
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ=
75-
github.com/onsi/ginkgo/v2 v2.22.0 h1:Yed107/8DjTr0lKCNt7Dn8yQ6ybuDRQoMGrNFKzMfHg=
76-
github.com/onsi/ginkgo/v2 v2.22.0/go.mod h1:7Du3c42kxCUegi0IImZ1wUQzMBVecgIHjR1C+NkhLQo=
75+
github.com/onsi/ginkgo/v2 v2.22.1 h1:QW7tbJAUDyVDVOM5dFa7qaybo+CRfR7bemlQUN6Z8aM=
76+
github.com/onsi/ginkgo/v2 v2.22.1/go.mod h1:S6aTpoRsSq2cZOd+pssHAlKW/Q/jZt6cPrPlnj4a1xM=
7777
github.com/onsi/gomega v1.36.1 h1:bJDPBO7ibjxcbHMgSCoo4Yj18UWbKDlLwX1x9sybDcw=
7878
github.com/onsi/gomega v1.36.1/go.mod h1:PvZbdDc8J6XJEpDK4HCuRBm8a6Fzp9/DmhC9C7yFlog=
7979
github.com/open-telemetry/opentelemetry-operator v0.103.0 h1:L0REMuJSMZjqCw7p7fWMn19XkiIULMr3NnHdPLryMQs=
@@ -144,8 +144,8 @@ golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn
144144
golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
145145
golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
146146
golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
147-
golang.org/x/net v0.30.0 h1:AcW1SDZMkb8IpzCdQUaIq2sP4sZ4zw+55h6ynffypl4=
148-
golang.org/x/net v0.30.0/go.mod h1:2wGyMJ5iFasEhkwi13ChkO/t1ECNC4X4eBKkVFyYFlU=
147+
golang.org/x/net v0.32.0 h1:ZqPmj8Kzc+Y6e0+skZsuACbx+wzMgo5MQsJh9Qd6aYI=
148+
golang.org/x/net v0.32.0/go.mod h1:CwU0IoeOlnQQWJ6ioyFrfRuomB8GKF6KbYXZVyeXNfs=
149149
golang.org/x/oauth2 v0.22.0 h1:BzDx2FehcG7jJwgWLELCdmLuxk2i+x9UDpSiss2u0ZA=
150150
golang.org/x/oauth2 v0.22.0/go.mod h1:XYTD2NtWslqkgxebSiOHnXEap4TF09sJSc7H1sXbhtI=
151151
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
@@ -168,8 +168,8 @@ golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGm
168168
golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
169169
golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE=
170170
golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=
171-
golang.org/x/tools v0.26.0 h1:v/60pFQmzmT9ExmjDv2gGIfi3OqfKoEP6I5+umXlbnQ=
172-
golang.org/x/tools v0.26.0/go.mod h1:TPVVj70c7JJ3WCazhD8OdXcZg/og+b9+tH/KxylGwH0=
171+
golang.org/x/tools v0.28.0 h1:WuB6qZ4RPCQo5aP3WdKZS7i595EdWqWR8vqJTlwTVK8=
172+
golang.org/x/tools v0.28.0/go.mod h1:dcIOrVd3mfQKTgrDVQHqCPMWy6lnhfhtX3hLXYVLfRw=
173173
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
174174
golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
175175
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=

helm/slurm-cluster-storage/Chart.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,5 +2,5 @@ apiVersion: v2
22
name: helm-slurm-cluster-storage
33
description: A Helm chart for Kubernetes
44
type: application
5-
version: "1.16.0"
6-
appVersion: "1.16.0"
5+
version: "1.16.1"
6+
appVersion: "1.16.1"

0 commit comments

Comments
 (0)