Skip to content

Commit 65e3d17

Browse files
authored
chore: Jan backport (01/12/2026) (#1246)
2 parents 5de2741 + 94586a4 commit 65e3d17

File tree

95 files changed

+9522
-1612
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

95 files changed

+9522
-1612
lines changed

.github/workflows/ci.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -157,7 +157,7 @@ jobs:
157157

158158
- name: Upload logs
159159
if: always()
160-
uses: actions/upload-artifact@v5
160+
uses: actions/upload-artifact@v6
161161
with:
162162
name: e2e-logs-${{ matrix.customized-settings }}
163163
path: test/e2e/logs-${{ matrix.customized-settings }}/

.github/workflows/codespell.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ jobs:
1212
runs-on: ubuntu-latest
1313
steps:
1414
- name: Harden Runner
15-
uses: step-security/harden-runner@df199fb7be9f65074067a9eb93f12bb4c5547cf2 # v2.13.3
15+
uses: step-security/harden-runner@20cf305ff2072d973412fa9b1e3a4f227bda3c76 # v2.14.0
1616
with:
1717
egress-policy: audit
1818

Makefile

Lines changed: 25 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -141,7 +141,7 @@ $(PROTOC):
141141
unzip $(TOOLS_BIN_DIR)/protoc.zip -d $(TOOLS_BIN_DIR)/protoc_tmp && mv $(TOOLS_BIN_DIR)/protoc_tmp/bin/protoc $(PROTOC) && rm -rf $(TOOLS_BIN_DIR)/protoc.zip $(TOOLS_BIN_DIR)/protoc_tmp
142142

143143
.PHONY: help
144-
help: ## Display this help.
144+
help: ## Display this help
145145
@awk 'BEGIN {FS = ":.*##"; printf "\nUsage:\n make \033[36m<target>\033[0m\n"} /^[a-zA-Z_0-9-]+:.*?##/ { printf " \033[36m%-15s\033[0m %s\n", $$1, $$2 } /^##@/ { printf "\n\033[1m%s\033[0m\n", substr($$0, 5) } ' $(MAKEFILE_LIST)
146146

147147

@@ -150,7 +150,7 @@ help: ## Display this help.
150150
## --------------------------------------
151151

152152
.PHONY: lint
153-
lint: $(GOLANGCI_LINT)
153+
lint: $(GOLANGCI_LINT) ## Run fast linting
154154
$(GOLANGCI_LINT) run -v
155155

156156
.PHONY: lint-full
@@ -161,36 +161,36 @@ lint-full: $(GOLANGCI_LINT) ## Run slower linters to detect possible issues
161161
## Development
162162
## --------------------------------------
163163

164-
staticcheck: $(STATICCHECK)
164+
staticcheck: $(STATICCHECK) ## Run static analysis
165165
$(STATICCHECK) ./...
166166

167167
.PHONY: fmt
168-
fmt: $(GOIMPORTS) ## Run go fmt against code.
168+
fmt: $(GOIMPORTS) ## Run go fmt against code
169169
go fmt ./...
170170
$(GOIMPORTS) -local go.goms.io/fleet -w $$(go list -f {{.Dir}} ./...)
171171

172172
.PHONY: vet
173-
vet: ## Run go vet against code.
173+
vet: ## Run go vet against code
174174
go vet ./...
175175

176176
## --------------------------------------
177177
## test
178178
## --------------------------------------
179179

180180
.PHONY: test
181-
test: manifests generate fmt vet local-unit-test integration-test ## Run tests.
181+
test: manifests generate fmt vet local-unit-test integration-test ## Run unit tests and integration tests
182182

183183
##
184184
# Set up the timeout parameters as some of the tests (rollout controller) lengths have exceeded the default 10 minute mark.
185185
# TO-DO (chenyu1): enable parallelization for single package integration tests.
186186
.PHONY: local-unit-test
187-
local-unit-test: $(ENVTEST) ## Run tests.
187+
local-unit-test: $(ENVTEST) ## Run unit tests
188188
export CGO_ENABLED=1 && \
189189
export KUBEBUILDER_ASSETS="$(shell $(ENVTEST) use $(ENVTEST_K8S_VERSION) -p path)" && \
190-
go test `go list ./pkg/... ./cmd/...` -race -coverpkg=./... -coverprofile=ut-coverage.xml -covermode=atomic -v -timeout=20m
190+
go test `go list ./pkg/... ./cmd/...` -race -coverpkg=./... -coverprofile=ut-coverage.xml -covermode=atomic -v -timeout=30m
191191

192192
.PHONY: integration-test
193-
integration-test: $(ENVTEST) ## Run tests.
193+
integration-test: $(ENVTEST) ## Run integration tests
194194
export CGO_ENABLED=1 && \
195195
export KUBEBUILDER_ASSETS="$(shell $(ENVTEST) use $(ENVTEST_K8S_VERSION) -p path)" && \
196196
ginkgo -v -p --race --cover --coverpkg=./pkg/scheduler/... ./test/scheduler && \
@@ -202,14 +202,14 @@ integration-test: $(ENVTEST) ## Run tests.
202202
LABEL_FILTER ?= !custom
203203

204204
.PHONY: e2e-tests
205-
e2e-tests: setup-clusters
205+
e2e-tests: setup-clusters ## Run E2E tests
206206
cd ./test/e2e && ginkgo --timeout=70m --label-filter="$(LABEL_FILTER)" -v -p .
207207

208-
e2e-tests-custom: setup-clusters
208+
e2e-tests-custom: setup-clusters ## Run custom E2E tests with labels
209209
cd ./test/e2e && ginkgo --label-filter="custom" -v -p .
210210

211211
.PHONY: setup-clusters
212-
setup-clusters:
212+
setup-clusters: ## Set up Kind clusters for E2E testing
213213
cd ./test/e2e && chmod +x ./setup.sh && ./setup.sh $(MEMBER_CLUSTER_COUNT)
214214

215215
.PHONY: collect-e2e-logs
@@ -218,7 +218,7 @@ collect-e2e-logs: ## Collect logs from hub and member agent pods after e2e tests
218218

219219
## reviewable
220220
.PHONY: reviewable
221-
reviewable: fmt vet lint staticcheck
221+
reviewable: fmt vet lint staticcheck ## Run all quality checks before PR
222222
go mod tidy
223223

224224
## --------------------------------------
@@ -230,7 +230,7 @@ CRD_OPTIONS ?= "crd"
230230

231231
# Generate manifests e.g. CRD, RBAC etc.
232232
.PHONY: manifests
233-
manifests: $(CONTROLLER_GEN)
233+
manifests: $(CONTROLLER_GEN) ## Generate CRDs and manifests
234234
$(CONTROLLER_GEN) \
235235
$(CRD_OPTIONS) rbac:roleName=manager-role webhook paths="./apis/..." output:crd:artifacts:config=config/crd/bases
236236

@@ -243,7 +243,7 @@ protos: $(PROTOC_GEN_GO) $(PROTOC_GEN_GO_GRPC) $(PROTOC_GEN_GRPC_GATEWAY) $(PROT
243243
apis/protos/azure/compute/v1/vmsizerecommender.proto
244244

245245
# Generate code
246-
generate: $(CONTROLLER_GEN) protos
246+
generate: $(CONTROLLER_GEN) protos ## Generate deep copy methods
247247
$(CONTROLLER_GEN) \
248248
object:headerFile="hack/boilerplate.go.txt" paths="./..."
249249

@@ -252,17 +252,17 @@ generate: $(CONTROLLER_GEN) protos
252252
## --------------------------------------
253253

254254
.PHONY: build
255-
build: generate fmt vet ## Build agent binaries.
255+
build: generate fmt vet ## Build agent binaries
256256
go build -o bin/hubagent cmd/hubagent/main.go
257257
go build -o bin/memberagent cmd/memberagent/main.go
258258
go build -o bin/crdinstaller cmd/crdinstaller/main.go
259259

260260
.PHONY: run-hubagent
261-
run-hubagent: manifests generate fmt vet ## Run a controllers from your host.
261+
run-hubagent: manifests generate fmt vet ## Run hub-agent from your host
262262
go run ./cmd/hubagent/main.go
263263

264264
.PHONY: run-memberagent
265-
run-memberagent: manifests generate fmt vet ## Run a controllers from your host.
265+
run-memberagent: manifests generate fmt vet ## Run member-agent from your host
266266
go run ./cmd/memberagent/main.go
267267

268268
.PHONY: run-crdinstaller
@@ -279,7 +279,7 @@ QEMU_VERSION ?= 7.2.0-1
279279
BUILDKIT_VERSION ?= v0.18.1
280280

281281
.PHONY: push
282-
push:
282+
push: ## Build and push all Docker images
283283
$(MAKE) OUTPUT_TYPE="type=registry" docker-build-hub-agent docker-build-member-agent docker-build-refresh-token docker-build-crd-installer
284284

285285
# By default, docker buildx create will pull image moby/buildkit:buildx-stable-1 and hit the too many requests error
@@ -289,7 +289,7 @@ push:
289289
# we keep the original setup if the build target is x86_64 platforms (default) for compatibility reasons, but will switch to
290290
# a more general setup for non-x86_64 hosts.
291291
#
292-
# On some systems the emulation setup might not work at all (e.g., macOS on Apple Silicon -> Rosetta 2 will be used
292+
# On some systems the emulation setup might not work at all (e.g., macOS on Apple Silicon -> Rosetta 2 will be used
293293
# by Docker Desktop as the default emulation option for AMD64 on ARM64 container compatibility).
294294
.PHONY: docker-buildx-builder
295295
# Note (chenyu1): the step below sets up emulation for building/running non-native binaries on the host. The original
@@ -313,7 +313,7 @@ docker-buildx-builder:
313313
fi
314314

315315
.PHONY: docker-build-hub-agent
316-
docker-build-hub-agent: docker-buildx-builder
316+
docker-build-hub-agent: docker-buildx-builder ## Build hub-agent image
317317
docker buildx build \
318318
--file docker/$(HUB_AGENT_IMAGE_NAME).Dockerfile \
319319
--output=$(OUTPUT_TYPE) \
@@ -325,7 +325,7 @@ docker-build-hub-agent: docker-buildx-builder
325325
--build-arg GOOS=$(TARGET_OS) .
326326

327327
.PHONY: docker-build-member-agent
328-
docker-build-member-agent: docker-buildx-builder
328+
docker-build-member-agent: docker-buildx-builder ## Build member-agent image
329329
docker buildx build \
330330
--file docker/$(MEMBER_AGENT_IMAGE_NAME).Dockerfile \
331331
--output=$(OUTPUT_TYPE) \
@@ -337,7 +337,7 @@ docker-build-member-agent: docker-buildx-builder
337337
--build-arg GOOS=$(TARGET_OS) .
338338

339339
.PHONY: docker-build-refresh-token
340-
docker-build-refresh-token: docker-buildx-builder
340+
docker-build-refresh-token: docker-buildx-builder ## Build refresh-token image
341341
docker buildx build \
342342
--file docker/$(REFRESH_TOKEN_IMAGE_NAME).Dockerfile \
343343
--output=$(OUTPUT_TYPE) \
@@ -366,7 +366,7 @@ helm-package-arc-member-cluster-agents:
366366
envsubst < charts/member-agent-arc/values.yaml > charts/member-agent-arc/values.yaml.tmp && \
367367
mv charts/member-agent-arc/values.yaml.tmp charts/member-agent-arc/values.yaml && \
368368
helm package charts/member-agent-arc/ --version $(ARC_MEMBER_AGENT_HELMCHART_VERSION)
369-
369+
370370
helm push $(ARC_MEMBER_AGENT_HELMCHART_NAME)-$(ARC_MEMBER_AGENT_HELMCHART_VERSION).tgz oci://$(REGISTRY)
371371

372372
## -----------------------------------
@@ -379,5 +379,5 @@ clean-bin: ## Remove all generated binaries
379379
rm -rf ./bin
380380

381381
.PHONY: clean-e2e-tests
382-
clean-e2e-tests:
382+
clean-e2e-tests: ## Clean up E2E test clusters
383383
cd ./test/e2e && chmod +x ./stop.sh && ./stop.sh $(MEMBER_CLUSTER_COUNT)

apis/placement/v1beta1/clusterresourceplacement_types.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -524,9 +524,9 @@ const (
524524
type RolloutStrategy struct {
525525
// Type of rollout. The only supported types are "RollingUpdate" and "External".
526526
// Default is "RollingUpdate".
527-
// +kubebuilder:validation:Optional
528527
// +kubebuilder:default=RollingUpdate
529528
// +kubebuilder:validation:Enum=RollingUpdate;External
529+
// +kubebuilder:validation:XValidation:rule="!(self != 'External' && oldSelf == 'External')",message="cannot change rollout strategy type from 'External' to other types"
530530
Type RolloutStrategyType `json:"type,omitempty"`
531531

532532
// Rolling update config params. Present only if RolloutStrategyType = RollingUpdate.

apis/placement/v1beta1/stageupdate_types.go

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,7 @@ type UpdateRunObjList interface {
9696
// +kubebuilder:printcolumn:JSONPath=`.status.conditions[?(@.type=="Succeeded")].status`,name="Succeeded",type=string
9797
// +kubebuilder:printcolumn:JSONPath=`.metadata.creationTimestamp`,name="Age",type=date
9898
// +kubebuilder:printcolumn:JSONPath=`.spec.stagedRolloutStrategyName`,name="Strategy",priority=1,type=string
99-
// +kubebuilder:validation:XValidation:rule="size(self.metadata.name) < 128",message="metadata.name max length is 127"
99+
// +kubebuilder:validation:XValidation:rule="size(self.metadata.name) < 64",message="metadata.name max length is 63"
100100

101101
// ClusterStagedUpdateRun represents a stage by stage update process that applies ClusterResourcePlacement
102102
// selected resources to specified clusters.
@@ -427,7 +427,7 @@ const (
427427
// Its condition status can be one of the following:
428428
// - "True": The staged update run is making progress.
429429
// - "False": The staged update run is waiting/paused/abandoned.
430-
// - "Unknown" means it is unknown.
430+
// - "Unknown": The staged update run is in a transitioning state.
431431
StagedUpdateRunConditionProgressing StagedUpdateRunConditionType = "Progressing"
432432

433433
// StagedUpdateRunConditionSucceeded indicates whether the staged update run is completed successfully.
@@ -489,7 +489,8 @@ const (
489489
// StageUpdatingConditionProgressing indicates whether the stage updating is making progress.
490490
// Its condition status can be one of the following:
491491
// - "True": The stage updating is making progress.
492-
// - "False": The stage updating is waiting/pausing.
492+
// - "False": The stage updating is waiting.
493+
// - "Unknown": The staged updating is a transitioning state.
493494
StageUpdatingConditionProgressing StageUpdatingConditionType = "Progressing"
494495

495496
// StageUpdatingConditionSucceeded indicates whether the stage updating is completed successfully.
@@ -790,7 +791,7 @@ func (c *ClusterApprovalRequestList) GetApprovalRequestObjs() []ApprovalRequestO
790791
// +kubebuilder:printcolumn:JSONPath=`.status.conditions[?(@.type=="Succeeded")].status`,name="Succeeded",type=string
791792
// +kubebuilder:printcolumn:JSONPath=`.metadata.creationTimestamp`,name="Age",type=date
792793
// +kubebuilder:printcolumn:JSONPath=`.spec.stagedRolloutStrategyName`,name="Strategy",priority=1,type=string
793-
// +kubebuilder:validation:XValidation:rule="size(self.metadata.name) < 128",message="metadata.name max length is 127"
794+
// +kubebuilder:validation:XValidation:rule="size(self.metadata.name) < 64",message="metadata.name max length is 63"
794795

795796
// StagedUpdateRun represents a stage by stage update process that applies ResourcePlacement
796797
// selected resources to specified clusters.

cmd/hubagent/main.go

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,8 @@ import (
4646
"go.goms.io/fleet/cmd/hubagent/options"
4747
"go.goms.io/fleet/cmd/hubagent/workload"
4848
mcv1beta1 "go.goms.io/fleet/pkg/controllers/membercluster/v1beta1"
49+
readiness "go.goms.io/fleet/pkg/utils/informer/readiness"
50+
"go.goms.io/fleet/pkg/utils/validator"
4951
"go.goms.io/fleet/pkg/webhook"
5052
"go.goms.io/fleet/pkg/webhook/managedresource"
5153
// +kubebuilder:scaffold:imports
@@ -166,7 +168,17 @@ func main() {
166168

167169
ctx := ctrl.SetupSignalHandler()
168170
if err := workload.SetupControllers(ctx, &wg, mgr, config, opts); err != nil {
169-
klog.ErrorS(err, "unable to set up ready check")
171+
klog.ErrorS(err, "unable to set up controllers")
172+
exitWithErrorFunc()
173+
}
174+
175+
// Add readiness check for dynamic informer cache AFTER controllers are set up.
176+
// This ensures the discovery cache is populated before the hub agent is marked ready,
177+
// which is critical for all controllers that rely on dynamic resource discovery.
178+
// AddReadyzCheck adds additional readiness check instead of replacing the one registered earlier provided the name is different.
179+
// Both registered checks need to pass for the manager to be considered ready.
180+
if err := mgr.AddReadyzCheck("informer-cache", readiness.InformerReadinessChecker(validator.ResourceInformer)); err != nil {
181+
klog.ErrorS(err, "unable to set up informer cache readiness check")
170182
exitWithErrorFunc()
171183
}
172184

cmd/hubagent/workload/setup.go

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -376,7 +376,7 @@ func SetupControllers(ctx context.Context, wg *sync.WaitGroup, mgr ctrl.Manager,
376376
}
377377
defaultFramework := framework.NewFramework(schedulerProfile, mgr)
378378
defaultSchedulingQueue := queue.NewSimplePlacementSchedulingQueue(
379-
queue.WithName(schedulerQueueName),
379+
schedulerQueueName, nil,
380380
)
381381
// we use one scheduler for every 10 concurrent placement
382382
defaultScheduler := scheduler.NewScheduler("DefaultScheduler", defaultFramework, defaultSchedulingQueue, mgr,
@@ -514,7 +514,23 @@ func SetupControllers(ctx context.Context, wg *sync.WaitGroup, mgr ctrl.Manager,
514514
}
515515
resourceChangeController := controller.NewController(resourceChangeControllerName, controller.ClusterWideKeyFunc, rcr.Reconcile, rateLimiter)
516516

517+
// Set up the InformerPopulator that runs on ALL pods (leader and followers)
518+
// This ensures all pods have synced informer caches for webhook validation
519+
klog.Info("Setting up informer populator")
520+
informerPopulator := &resourcewatcher.InformerPopulator{
521+
DiscoveryClient: discoverClient,
522+
RESTMapper: mgr.GetRESTMapper(),
523+
InformerManager: dynamicInformerManager,
524+
ResourceConfig: resourceConfig,
525+
}
526+
527+
if err := mgr.Add(informerPopulator); err != nil {
528+
klog.ErrorS(err, "Failed to setup informer populator")
529+
return err
530+
}
531+
517532
// Set up a runner that starts all the custom controllers we created above
533+
// This runs ONLY on the leader and adds event handlers to the informers created by InformerPopulator
518534
resourceChangeDetector := &resourcewatcher.ChangeDetector{
519535
DiscoveryClient: discoverClient,
520536
RESTMapper: mgr.GetRESTMapper(),

config/crd/bases/placement.kubernetes-fleet.io_clusterresourceplacements.yaml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2482,6 +2482,10 @@ spec:
24822482
- RollingUpdate
24832483
- External
24842484
type: string
2485+
x-kubernetes-validations:
2486+
- message: cannot change rollout strategy type from 'External'
2487+
to other types
2488+
rule: '!(self != ''External'' && oldSelf == ''External'')'
24852489
type: object
24862490
required:
24872491
- resourceSelectors

config/crd/bases/placement.kubernetes-fleet.io_clusterstagedupdateruns.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2436,8 +2436,8 @@ spec:
24362436
- spec
24372437
type: object
24382438
x-kubernetes-validations:
2439-
- message: metadata.name max length is 127
2440-
rule: size(self.metadata.name) < 128
2439+
- message: metadata.name max length is 63
2440+
rule: size(self.metadata.name) < 64
24412441
served: true
24422442
storage: true
24432443
subresources:

config/crd/bases/placement.kubernetes-fleet.io_resourceplacements.yaml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -992,6 +992,10 @@ spec:
992992
- RollingUpdate
993993
- External
994994
type: string
995+
x-kubernetes-validations:
996+
- message: cannot change rollout strategy type from 'External'
997+
to other types
998+
rule: '!(self != ''External'' && oldSelf == ''External'')'
995999
type: object
9961000
required:
9971001
- resourceSelectors

0 commit comments

Comments
 (0)