Skip to content

Commit f0b8b84

Browse files
authored
Merge pull request #167 from nebius/restd
Slurm REST API
2 parents 09a8bb6 + 22ad9b7 commit f0b8b84

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

56 files changed

+961
-68
lines changed

.github/workflows/one_job.yml

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -52,9 +52,6 @@ jobs:
5252
with:
5353
egress-policy: audit
5454

55-
- name: Debug vars
56-
run: echo "UNSTABLE - is ${{ needs.pre-build.outputs.unstable }}"
57-
5855
- name: Checkout repository
5956
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
6057

@@ -63,6 +60,11 @@ jobs:
6360
with:
6461
go-version-file: 'go.mod'
6562

63+
- name: Debug vars
64+
run: |
65+
echo "UNSTABLE - is ${{ needs.pre-build.outputs.unstable }}"
66+
make get-version UNSTABLE=${{ needs.pre-build.outputs.unstable }}
67+
6668
- name: Check if version synced
6769
run: make test-version-sync
6870

@@ -108,6 +110,9 @@ jobs:
108110
109111
make docker-build UNSTABLE="${UNSTABLE}" IMAGE_NAME=exporter DOCKERFILE=exporter/exporter.dockerfile
110112
make docker-push UNSTABLE="${UNSTABLE}" IMAGE_NAME=exporter
113+
114+
make docker-build UNSTABLE="${UNSTABLE}" IMAGE_NAME=slurmrestd DOCKERFILE=restd/slurmrestd.dockerfile
115+
make docker-push UNSTABLE="${UNSTABLE}" IMAGE_NAME=slurmrestd
111116
112117
echo "Common images were built"
113118

Makefile

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -43,17 +43,12 @@ VALUES_VERSION = $(shell $(YQ) '.images.slurmctld' helm/slurm-cluster/values
4343
OPERATOR_IMAGE_TAG = $(VERSION)
4444

4545
ifeq ($(shell uname), Darwin)
46-
SHA_CMD = shasum -a 256
4746
SED_COMMAND = sed -i '' -e
48-
USER_MAIL = $(shell git config user.email)
4947
else
50-
SHA_CMD = sha256sum
5148
SED_COMMAND = sed -i -e
52-
USER_MAIL = $(shell git log -1 --pretty=format:'%ae')
5349
endif
5450
ifeq ($(UNSTABLE), true)
55-
USER_MAIL = $(shell git log -1 --pretty=format:'%ae')
56-
SHORT_SHA = $(shell echo -n "$(USER_MAIL)-$(VERSION)" | $(SHA_CMD) | cut -c1-8)
51+
SHORT_SHA = $(shell git rev-parse --short=8 HEAD)
5752
OPERATOR_IMAGE_TAG = $(VERSION)-$(SHORT_SHA)
5853
IMAGE_VERSION = $(VERSION)-$(UBUNTU_VERSION)-slurm$(SLURM_VERSION)-$(SHORT_SHA)
5954
IMAGE_REPO = $(NEBIUS_REPO)-unstable
@@ -177,6 +172,7 @@ sync-version: yq ## Sync versions from file
177172
@echo 'Syncing helm/slurm-cluster/values.yaml'
178173
@$(YQ) -i ".images.ncclBenchmark = \"$(IMAGE_REPO)/nccl_benchmark:$(IMAGE_VERSION)\"" "helm/slurm-cluster/values.yaml"
179174
@$(YQ) -i ".images.slurmctld = \"$(IMAGE_REPO)/controller_slurmctld:$(IMAGE_VERSION)\"" "helm/slurm-cluster/values.yaml"
175+
@$(YQ) -i ".images.slurmrestd = \"$(IMAGE_REPO)/slurmrestd:$(IMAGE_VERSION)\"" "helm/slurm-cluster/values.yaml"
180176
@$(YQ) -i ".images.slurmdbd = \"$(IMAGE_REPO)/controller_slurmdbd:$(IMAGE_VERSION)\"" "helm/slurm-cluster/values.yaml"
181177
@$(YQ) -i ".images.slurmd = \"$(IMAGE_REPO)/worker_slurmd:$(IMAGE_VERSION)\"" "helm/slurm-cluster/values.yaml"
182178
@$(YQ) -i ".images.sshd = \"$(IMAGE_REPO)/login_sshd:$(IMAGE_VERSION)\"" "helm/slurm-cluster/values.yaml"

VERSION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
1.14.17
1+
1.15.1

api/v1/slurmcluster_types.go

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -330,6 +330,24 @@ type SlurmNodes struct {
330330
// TODO: Making exporter optional requires SlurmNode.K8sNodeFilterName to be optional.
331331
// +kubebuilder:validation:Required
332332
Exporter SlurmExporter `json:"exporter"`
333+
334+
Rest SlurmRest `json:"rest"`
335+
}
336+
337+
// SlurmRest represents the Slur REST API configuration
338+
type SlurmRest struct {
339+
SlurmNode `json:",inline"`
340+
341+
// Enabled defines whether the SlurmRest is enabled
342+
//
343+
// +kubebuilder:validation:Optional
344+
// +kubebuilder:default=false
345+
Enabled bool `json:"enabled,omitempty"`
346+
347+
// SlurmRestNode represents the Slurm REST API daemon configuration
348+
//
349+
// +kubebuilder:validation:Optional
350+
SlurmRestNode NodeContainer `json:"rest,omitempty"`
333351
}
334352

335353
// SlurmNodeAccounting represents the Slurm accounting configuration
@@ -680,7 +698,7 @@ type ExporterContainer struct {
680698
PodTemplateNameRef *string `json:"podTemplateNameRef,omitempty"`
681699
}
682700

683-
// SlurmExporterVolumes define the volumes for the Slurm controller node
701+
// SlurmExporterVolumes define the volumes for the Slurm exporter node
684702
type SlurmExporterVolumes struct {
685703
// Jail represents the jail data volume configuration
686704
//

api/v1/zz_generated.deepcopy.go

Lines changed: 18 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

config/crd/bases/slurm.nebius.ai_slurmclusters.yaml

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9974,6 +9974,68 @@ spec:
99749974
- sshdServiceType
99759975
- volumes
99769976
type: object
9977+
rest:
9978+
description: SlurmRest represents the Slur REST API configuration
9979+
properties:
9980+
enabled:
9981+
default: false
9982+
description: Enabled defines whether the SlurmRest is enabled
9983+
type: boolean
9984+
k8sNodeFilterName:
9985+
description: |-
9986+
K8sNodeFilterName defines the Kubernetes node filter name associated with the Slurm node.
9987+
Must correspond to the name of one of [K8sNodeFilter]
9988+
type: string
9989+
rest:
9990+
description: SlurmRestNode represents the Slurm REST API daemon
9991+
configuration
9992+
properties:
9993+
appArmorProfile:
9994+
default: unconfined
9995+
description: AppArmorProfile defines the AppArmor profile
9996+
for the Slurm worker node
9997+
type: string
9998+
image:
9999+
description: Image defines the container image
10000+
type: string
10001+
imagePullPolicy:
10002+
default: IfNotPresent
10003+
description: ImagePullPolicy defines the image pull policy
10004+
enum:
10005+
- Always
10006+
- Never
10007+
- IfNotPresent
10008+
type: string
10009+
port:
10010+
description: Port defines the port the container exposes
10011+
format: int32
10012+
type: integer
10013+
resources:
10014+
additionalProperties:
10015+
anyOf:
10016+
- type: integer
10017+
- type: string
10018+
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
10019+
x-kubernetes-int-or-string: true
10020+
description: Resources defines the [corev1.ResourceRequirements]
10021+
for the container
10022+
type: object
10023+
securityLimitsConfig:
10024+
description: |-
10025+
SecurityLimitsConfig represents multiline limits.conf
10026+
format of a string should be: '* <soft|hard> <item> <value>'
10027+
example: '* soft nofile 1024'
10028+
type: string
10029+
required:
10030+
- image
10031+
type: object
10032+
size:
10033+
description: Size defines the number of node instances
10034+
format: int32
10035+
type: integer
10036+
required:
10037+
- k8sNodeFilterName
10038+
type: object
997710039
worker:
997810040
description: Worker represents the Slurm worker node configuration
997910041
properties:
@@ -10554,6 +10616,7 @@ spec:
1055410616
- controller
1055510617
- exporter
1055610618
- login
10619+
- rest
1055710620
- worker
1055810621
type: object
1055910622
telemetry:

config/manager/kustomization.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,4 +3,4 @@ resources:
33
images:
44
- name: controller
55
newName: cr.eu-north1.nebius.cloud/soperator/slurm-operator
6-
newTag: 1.14.17
6+
newTag: 1.15.1

config/manager/manager.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@ spec:
8282
value: "false"
8383
- name: SLURM_OPERATOR_WATCH_NAMESPACES
8484
value: "*"
85-
image: controller:1.14.17
85+
image: controller:1.15.1
8686
imagePullPolicy: Always
8787
name: manager
8888
securityContext:

helm/slurm-cluster-storage/Chart.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,5 +2,5 @@ apiVersion: v2
22
name: helm-slurm-cluster-storage
33
description: A Helm chart for Kubernetes
44
type: application
5-
version: "1.14.17"
6-
appVersion: "1.14.17"
5+
version: "1.15.1"
6+
appVersion: "1.15.1"

helm/slurm-cluster/Chart.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,5 +2,5 @@ apiVersion: v2
22
name: helm-slurm-cluster
33
description: A Helm chart for Kubernetes
44
type: application
5-
version: "1.14.17"
6-
appVersion: "1.14.17"
5+
version: "1.15.1"
6+
appVersion: "1.15.1"

0 commit comments

Comments
 (0)