Skip to content

Commit 7067a47

Browse files
Merge branch 'kubeflow:master' into test-release
2 parents e88a5c2 + abf42d5 commit 7067a47

35 files changed

+314
-2545
lines changed
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
name: Code Quality Checks
2+
3+
on:
4+
push:
5+
pull_request:
6+
7+
jobs:
8+
code-quality:
9+
runs-on: ubuntu-latest
10+
11+
steps:
12+
- name: Checkout code
13+
uses: actions/checkout@v6
14+
with:
15+
fetch-depth: 0
16+
17+
- name: Setup Go
18+
uses: actions/setup-go@v5
19+
with:
20+
go-version-file: go.mod
21+
22+
- name: Check KubeLinter
23+
run: make lint-manifests
24+
25+
- name: Check Helm Charts
26+
run: make helm-lint TARGET_BRANCH=${{ github.event.pull_request.base.ref || github.event.repository.default_branch || 'master' }}

.github/workflows/test-helm.yaml

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
name: Unit and Integration Test - Helm
2+
3+
on:
4+
- pull_request
5+
- push
6+
7+
permissions:
8+
contents: read
9+
10+
jobs:
11+
test:
12+
runs-on: ubuntu-latest
13+
14+
steps:
15+
- name: Checkout code
16+
uses: actions/checkout@v6
17+
18+
- name: Setup Go
19+
uses: actions/setup-go@v6
20+
with:
21+
go-version-file: go.mod
22+
23+
- name: Run Helm Unit Tests
24+
run: make helm-unittest

.github/workflows/trivy-scan.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ jobs:
1313
uses: actions/checkout@v6
1414

1515
- name: Run Trivy vulnerability scanner in repo mode
16-
uses: aquasecurity/trivy-action@0.33.1
16+
uses: aquasecurity/trivy-action@0.34.0
1717
with:
1818
scan-type: "fs"
1919
ignore-unfixed: true

.kube-linter.yaml

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
# KubeLinter configuration file
2+
# For more information, see https://docs.kubelinter.io/#/configuring-kubelinter
3+
4+
# checks section configures which checks to run.
5+
checks:
6+
# Do not include any of the default checks.
7+
# If this is false, all default checks are included,
8+
# and the "include" and "exclude" lists are applied on top of them.
9+
doNotIncludeBuiltIn: false
10+
11+
# include is a list of check names to include.
12+
# include:
13+
# - "check-name-1"
14+
15+
# exclude is a list of check names to exclude.
16+
exclude:
17+
- "unset-cpu-requirements"
18+
- "unset-memory-requirements"
19+
- "mismatching-selector"
20+
- "no-read-only-root-fs"
21+
- "run-as-non-root"
22+
- "no-anti-affinity"
23+
- "non-existent-service-account" # Often false positive in kustomize
24+
- "latest-tag"

Makefile

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ HELM_UNITTEST_VERSION ?= 0.5.1
2828
HELM_CHART_TESTING_VERSION ?= v3.12.0
2929
HELM_DOCS_VERSION ?= v1.14.2
3030
YQ_VERSION ?= v4.45.1
31+
KUBE_LINTER_VERSION ?= v0.7.1
3132

3233
# Container runtime (docker or podman)
3334
CONTAINER_RUNTIME ?=
@@ -42,6 +43,7 @@ HELM_DOCS ?= $(LOCALBIN)/helm-docs
4243
YQ ?= $(LOCALBIN)/yq
4344
GOLANGCI_LINT ?= $(LOCALBIN)/golangci-lint
4445
GOLANGCI_LINT_KAL ?= $(LOCALBIN)/golangci-lint-kube-api-linter
46+
KUBE_LINTER ?= $(LOCALBIN)/kube-linter
4547

4648
##@ General
4749

@@ -106,6 +108,15 @@ helm-docs-plugin: ## Download helm-docs plugin locally if required.
106108
yq: # Download yq locally if required.
107109
GOBIN=$(LOCALBIN) go install github.com/mikefarah/yq/v4@$(YQ_VERSION)
108110

111+
.PHONY: kube-linter
112+
kube-linter: ## Download kube-linter locally if required.
113+
GOBIN=$(LOCALBIN) go install golang.stackrox.io/kube-linter/cmd/kube-linter@$(KUBE_LINTER_VERSION)
114+
115+
.PHONY: lint-manifests
116+
lint-manifests: kube-linter ## Run kube-linter on manifests and helm charts.
117+
$(KUBE_LINTER) lint manifests/base --config .kube-linter.yaml
118+
$(KUBE_LINTER) lint charts/kubeflow-trainer --config .kube-linter.yaml
119+
109120
# Download external CRDs for Go integration testings.
110121
EXTERNAL_CRDS_DIR ?= $(PROJECT_DIR)/manifests/external-crds
111122

@@ -217,13 +228,15 @@ test-e2e-notebook: ## Run Jupyter Notebook with Papermill.
217228

218229
##@ Helm
219230

231+
TARGET_BRANCH ?= master
232+
220233
.PHONY: helm-unittest
221234
helm-unittest: helm-unittest-plugin ## Run Helm chart unittests.
222235
$(HELM) unittest $(TRAINER_CHART_DIR) --strict --file "tests/**/*_test.yaml"
223236

224237
.PHONY: helm-lint
225238
helm-lint: ## Run Helm chart lint test.
226-
docker run --rm --workdir /workspace --user "$(shell id -u):$(shell id -g)" --volume "$$(pwd):/workspace" quay.io/helmpack/chart-testing:$(HELM_CHART_TESTING_VERSION) ct lint --target-branch master --validate-maintainers=false
239+
docker run --rm --workdir /workspace --user "$(shell id -u):$(shell id -g)" --volume "$$(pwd):/workspace" quay.io/helmpack/chart-testing:$(HELM_CHART_TESTING_VERSION) ct lint --target-branch $(TARGET_BRANCH) --validate-maintainers=false --check-version-increment=false
227240

228241
.PHONY: helm-docs
229242
helm-docs: helm-docs-plugin ## Generates markdown documentation for helm charts from requirements and values files.

api/openapi-spec/swagger.json

Lines changed: 0 additions & 42 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

api/python_api/kubeflow_trainer_api/models/__init__.py

Lines changed: 0 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

api/python_api/kubeflow_trainer_api/models/trainer_v1alpha1_torch_ml_policy_source.py

Lines changed: 1 addition & 7 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

charts/kubeflow-trainer/README.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ dataCache:
7070
cacheImage:
7171
tag: "v2.0.0"
7272
runtimes:
73-
torchDistributed:
73+
torchDistributedWithCache:
7474
enabled: true
7575
```
7676
@@ -133,8 +133,8 @@ See [helm uninstall](https://helm.sh/docs/helm/helm_uninstall) for command docum
133133
| dataCache.cacheImage.registry | string | `"ghcr.io"` | Data cache image registry |
134134
| dataCache.cacheImage.repository | string | `"kubeflow/trainer/data-cache"` | Data cache image repository |
135135
| dataCache.cacheImage.tag | string | `""` | Data cache image tag. Defaults to chart version if empty. |
136-
| dataCache.runtimes.torchDistributed | object | `{"enabled":false}` | PyTorch distributed training with data cache support |
137-
| dataCache.runtimes.torchDistributed.enabled | bool | `false` | Enable deployment of torch-distributed-with-cache runtime |
136+
| dataCache.runtimes.torchDistributedWithCache | object | `{"enabled":false}` | PyTorch distributed training with data cache support |
137+
| dataCache.runtimes.torchDistributedWithCache.enabled | bool | `false` | Enable deployment of torch-distributed-with-cache runtime |
138138
| runtimes | object | `{"deepspeedDistributed":{"enabled":false,"image":{"registry":"ghcr.io","repository":"kubeflow/trainer/deepspeed-runtime","tag":""}},"defaultEnabled":false,"jaxDistributed":{"enabled":false},"mlxDistributed":{"enabled":false,"image":{"registry":"ghcr.io","repository":"kubeflow/trainer/mlx-runtime","tag":""}},"torchDistributed":{"enabled":false},"torchtuneDistributed":{"image":{"registry":"ghcr.io","repository":"kubeflow/trainer/torchtune-trainer","tag":""},"llama3_2_1B":{"enabled":false},"llama3_2_3B":{"enabled":false},"qwen2_5_1_5B":{"enabled":false}}}` | ClusterTrainingRuntimes configuration These are optional runtime templates that can be deployed with the Helm chart. Each runtime provides a blueprint for different ML frameworks and configurations. |
139139
| runtimes.defaultEnabled | bool | `false` | Enable all default runtimes (torch, deepspeed, mlx, jax, torchtune) when set to true. Individual runtime settings will be ignored if this is enabled. |
140140
| runtimes.torchDistributed | object | `{"enabled":false}` | PyTorch distributed training runtime (no custom images required) |

charts/kubeflow-trainer/README.md.gotmpl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,7 @@ dataCache:
8888
cacheImage:
8989
tag: "v2.0.0"
9090
runtimes:
91-
torchDistributed:
91+
torchDistributedWithCache:
9292
enabled: true
9393
```
9494

0 commit comments

Comments
 (0)