Skip to content

Commit 2aa77bb

Browse files
committed
address comments
Signed-off-by: Zhiying Lin <[email protected]>
1 parent 0ceca9a commit 2aa77bb

File tree

10 files changed

+267
-12
lines changed

10 files changed

+267
-12
lines changed

apis/placement/v1/resourcesnapshot_types.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,9 @@ import (
5151
//
5252
// Each snapshot (excluding the first snapshot) MUST have the following annotations:
5353
// - `SubindexOfResourceSnapshotAnnotation` to store the subindex of resource snapshot in the group.
54+
//
55+
// Snapshot may have the following annotations to indicate the time of next resourceSnapshot candidate detected by the controller:
56+
// - `NextResourceSnapshotCandidateDetectionTime` to store the time of next resourceSnapshot candidate detected by the controller.
5457
type ClusterResourceSnapshot struct {
5558
metav1.TypeMeta `json:",inline"`
5659
metav1.ObjectMeta `json:"metadata,omitempty"`

apis/placement/v1beta1/resourcesnapshot_types.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,9 @@ const (
4040
// SubindexOfResourceSnapshotAnnotation is the annotation to store the subindex of resource snapshot in the group.
4141
SubindexOfResourceSnapshotAnnotation = fleetPrefix + "subindex-of-resource-snapshot"
4242

43+
// NextResourceSnapshotCandidateDetectionTimeAnnotation is the annotation to store the time of next resourceSnapshot candidate detected by the controller.
44+
NextResourceSnapshotCandidateDetectionTimeAnnotation = fleetPrefix + "next-resource-snapshot-candidate-detection-time"
45+
4346
// ResourceSnapshotNameFmt is resourcePolicySnapshot name format: {CRPName}-{resourceIndex}-snapshot.
4447
ResourceSnapshotNameFmt = "%s-%d-snapshot"
4548

charts/hub-agent/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,4 +40,4 @@ _See [helm install](https://helm.sh/docs/helm/helm_install/) for command documen
4040
| `ConcurrentResourceChangeSyncs` | Max concurrent resourceChange reconcilers | `20` |
4141
| `logFileMaxSize` | Max log file size before rotation | `1000000` |
4242
| `MaxFleetSizeSupported` | Max number of member clusters supported | `100` |
43-
| `resourceSnapshotCreationInterval` | Interval for resource snapshot creation | `30s` |
43+
| `resourceSnapshotCreationInterval` | Interval for resource snapshot creation | `1m` |

charts/hub-agent/values.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ enableGuardRail: true
1818
webhookClientConnectionType: service
1919
forceDeleteWaitTime: 15m0s
2020
clusterUnhealthyThreshold: 3m0s
21-
resourceSnapshotCreationInterval: 30s
21+
resourceSnapshotCreationInterval: 1m
2222

2323
namespace:
2424
fleet-system

cmd/hubagent/options/options.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,7 @@ func NewOptions() *Options {
125125
EnableStagedUpdateRunAPIs: true,
126126
EnablePprof: false,
127127
PprofPort: 6065,
128-
ResourceSnapshotCreationInterval: 30 * time.Second,
128+
ResourceSnapshotCreationInterval: 1 * time.Minute,
129129
}
130130
}
131131

@@ -172,7 +172,7 @@ func (o *Options) AddFlags(flags *flag.FlagSet) {
172172
flags.BoolVar(&o.EnablePprof, "enable-pprof", false, "If set, the pprof profiling is enabled.")
173173
flags.IntVar(&o.PprofPort, "pprof-port", 6065, "The port for pprof profiling.")
174174
flags.BoolVar(&o.DenyModifyMemberClusterLabels, "deny-modify-member-cluster-labels", false, "If set, users not in the system:masters cannot modify member cluster labels.")
175-
flags.DurationVar(&o.ResourceSnapshotCreationInterval, "resource-snapshot-creation-interval", 30*time.Second, "The interval at which resource snapshots are created.")
175+
flags.DurationVar(&o.ResourceSnapshotCreationInterval, "resource-snapshot-creation-interval", 1*time.Minute, "The interval at which resource snapshots are created.")
176176

177177
o.RateLimiterOpts.AddFlags(flags)
178178
}

pkg/controllers/clusterresourceplacement/controller.go

Lines changed: 49 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -536,12 +536,13 @@ func (r *Reconciler) getOrCreateClusterResourceSnapshot(ctx context.Context, crp
536536
if latestResourceSnapshot != nil && latestResourceSnapshotHash != resourceHash && latestResourceSnapshot.Labels[fleetv1beta1.IsLatestSnapshotLabel] == strconv.FormatBool(true) {
537537
// When the latest resource snapshot without the isLastest label, it means it fails to create the new
538538
// resource snapshot in the last reconcile and we don't need to check and delay the request.
539-
if since := time.Since(latestResourceSnapshot.CreationTimestamp.Time); since < r.ResourceSnapshotCreationInterval {
540-
// If the latest resource snapshot is created less than configured the resourceSnapshotCreationInterval,
541-
// requeue the request to avoid too frequent update.
542-
klog.V(2).InfoS("The latest resource snapshot is just created, skipping the update", "clusterResourcePlacement", crpKObj,
543-
"clusterResourceSnapshot", klog.KObj(latestResourceSnapshot), "creationTime", latestResourceSnapshot.CreationTimestamp, "configuredResourceSnapshotCreationInterval", r.ResourceSnapshotCreationInterval, "afterDuration", r.ResourceSnapshotCreationInterval-since)
544-
return ctrl.Result{Requeue: true, RequeueAfter: r.ResourceSnapshotCreationInterval - since}, latestResourceSnapshot, nil
539+
res, error := r.shouldCreateNewResourceSnapshotNow(ctx, latestResourceSnapshot)
540+
if error != nil {
541+
return ctrl.Result{}, nil, error
542+
}
543+
if res.Requeue {
544+
// If the latest resource snapshot is not ready to be updated, we requeue the request.
545+
return res, latestResourceSnapshot, nil
545546
}
546547

547548
// set the latest label to false first to make sure there is only one or none active resource snapshot
@@ -587,6 +588,48 @@ func (r *Reconciler) getOrCreateClusterResourceSnapshot(ctx context.Context, crp
587588
return ctrl.Result{}, latestResourceSnapshot, nil
588589
}
589590

591+
// shouldCreateNewResourceSnapshotNow checks whether it is ready to create the new resource snapshot to avoid too frequent creation
592+
// based on the configured ResourceSnapshotCreationInterval.
593+
func (r *Reconciler) shouldCreateNewResourceSnapshotNow(ctx context.Context, latestResourceSnapshot *fleetv1beta1.ClusterResourceSnapshot) (ctrl.Result, error) {
594+
// We reserve half of the resourceSnapshotCreationInterval to allow the controller to bundle all the resource changes into one snapshot.
595+
// For example, if the interval is 1m, the first resource change will be captured starting from 30s.
596+
// And then the next 30s will be used to caputre all the resource changes into one snapshot.
597+
snapshotKObj := klog.KObj(latestResourceSnapshot)
598+
now := time.Now()
599+
half := r.ResourceSnapshotCreationInterval / 2
600+
if since := now.Sub(latestResourceSnapshot.CreationTimestamp.Time); since < half {
601+
// If the latest resource snapshot is created less than configured the resourceSnapshotCreationInterval,
602+
// requeue the request to avoid too frequent update.
603+
klog.V(2).InfoS("The latest resource snapshot is just created, skipping the update",
604+
"clusterResourceSnapshot", snapshotKObj, "creationTime", latestResourceSnapshot.CreationTimestamp, "configuredResourceSnapshotCreationInterval", r.ResourceSnapshotCreationInterval, "afterDuration", half-since)
605+
return ctrl.Result{Requeue: true, RequeueAfter: half - since}, nil
606+
}
607+
nextResourceSnapshotCandidateDetectionTime, err := annotations.ExtractNextResourceSnapshotCandidateDetectionTimeFromResourceSnapshot(latestResourceSnapshot)
608+
if nextResourceSnapshotCandidateDetectionTime.IsZero() || err != nil {
609+
if err != nil {
610+
klog.ErrorS(controller.NewUnexpectedBehaviorError(err), "Failed to get the NextResourceSnapshotCandidateDetectionTimeAnnotation", "clusterResourceSnapshot", snapshotKObj)
611+
}
612+
// If the annoation is not set, set next resource snapshot candidate detection time is now.
613+
if latestResourceSnapshot.Annotations == nil {
614+
latestResourceSnapshot.Annotations = make(map[string]string)
615+
}
616+
latestResourceSnapshot.Annotations[fleetv1beta1.NextResourceSnapshotCandidateDetectionTimeAnnotation] = now.Format(time.RFC3339)
617+
if err := r.Client.Update(ctx, latestResourceSnapshot); err != nil {
618+
klog.ErrorS(err, "Failed to update the NextResourceSnapshotCandidateDetectionTimeAnnotation", "clusterResourceSnapshot", snapshotKObj)
619+
return ctrl.Result{}, controller.NewUpdateIgnoreConflictError(err)
620+
}
621+
klog.V(2).InfoS("Set the next-resource-snapshot-candidate-detection-time to now", "clusterResourceSnapshot", snapshotKObj, "nextResourceSnapshotCandidateDetectionTime", now)
622+
return ctrl.Result{Requeue: true, RequeueAfter: half}, nil
623+
}
624+
if delay := now.Sub(nextResourceSnapshotCandidateDetectionTime); delay < half {
625+
// If the next resource snapshot candidate detection time is not reached, we requeue the request to avoid too frequent update.
626+
klog.V(2).InfoS("The next resource snapshot candidate has not reached the half of the ResourceSnapshotCreationInterval, skipping the creation",
627+
"clusterResourceSnapshot", snapshotKObj, "nextResourceSnapshotCandidateDetectionTime", nextResourceSnapshotCandidateDetectionTime, "configuredResourceSnapshotCreationInterval", r.ResourceSnapshotCreationInterval, "afterDuration", half-delay)
628+
return ctrl.Result{Requeue: true, RequeueAfter: half - delay}, nil
629+
}
630+
return ctrl.Result{}, nil
631+
}
632+
590633
// buildMasterClusterResourceSnapshot builds and returns the master cluster resource snapshot for the latest resource snapshot index and selected resources.
591634
func buildMasterClusterResourceSnapshot(latestResourceSnapshotIndex, resourceSnapshotCount, envelopeObjCount int, crpName, resourceHash string, selectedResources []fleetv1beta1.ResourceContent) *fleetv1beta1.ClusterResourceSnapshot {
592635
return &fleetv1beta1.ClusterResourceSnapshot{

pkg/controllers/clusterresourceplacement/controller_test.go

Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4477,3 +4477,106 @@ func TestDetermineRolloutStateForCRPWithExternalRolloutStrategy(t *testing.T) {
44774477
})
44784478
}
44794479
}
4480+
4481+
func TestShouldCreateNewResourceSnapshotNow(t *testing.T) {
4482+
interval := 60 * time.Second
4483+
half := interval / 2
4484+
now := time.Now()
4485+
4486+
cases := []struct {
4487+
name string
4488+
creationTime time.Time
4489+
annotationValue string
4490+
wantAnnoation bool
4491+
wantRequeue bool
4492+
}{
4493+
{
4494+
name: "recently created resource snapshot",
4495+
creationTime: now.Add(-half + 5*time.Second),
4496+
wantRequeue: true,
4497+
},
4498+
{
4499+
name: "existing resource snapshot without annotation",
4500+
creationTime: now.Add(-2 * half),
4501+
// no annotation → sets it and requeues
4502+
annotationValue: "",
4503+
wantAnnoation: true,
4504+
wantRequeue: true,
4505+
},
4506+
{
4507+
name: "existing resource snapshot with expired annotation",
4508+
creationTime: now.Add(-2 * half),
4509+
annotationValue: now.Add(-half).Format(time.RFC3339),
4510+
// annotation far in past → no requeue
4511+
wantAnnoation: true,
4512+
wantRequeue: false,
4513+
},
4514+
{
4515+
name: "existing resource snapshot with invalid annotation",
4516+
creationTime: now.Add(-2 * half),
4517+
annotationValue: "invalid-value",
4518+
// annotation far in past → no requeue
4519+
wantAnnoation: true,
4520+
wantRequeue: true,
4521+
},
4522+
{
4523+
name: "existing resource snapshot with recently added annotation",
4524+
creationTime: now.Add(-2 * half),
4525+
annotationValue: now.Add(-half + 5*time.Second).Format(time.RFC3339),
4526+
// annotation exists but within half interval → requeue
4527+
wantAnnoation: true,
4528+
wantRequeue: true,
4529+
},
4530+
}
4531+
4532+
for _, tc := range cases {
4533+
t.Run(tc.name, func(t *testing.T) {
4534+
// initialize a snapshot with given creation time and annotation
4535+
snapshot := &fleetv1beta1.ClusterResourceSnapshot{
4536+
ObjectMeta: metav1.ObjectMeta{
4537+
Name: "test-snapshot",
4538+
CreationTimestamp: metav1.Time{Time: tc.creationTime},
4539+
Annotations: map[string]string{},
4540+
},
4541+
}
4542+
if tc.annotationValue != "" {
4543+
snapshot.Annotations[fleetv1beta1.NextResourceSnapshotCandidateDetectionTimeAnnotation] = tc.annotationValue
4544+
}
4545+
4546+
// use fake client seeded with the snapshot
4547+
scheme := serviceScheme(t)
4548+
client := fake.NewClientBuilder().
4549+
WithScheme(scheme).
4550+
WithRuntimeObjects(snapshot.DeepCopy()).
4551+
Build()
4552+
4553+
r := &Reconciler{
4554+
Client: client,
4555+
ResourceSnapshotCreationInterval: interval,
4556+
}
4557+
4558+
ctx := context.Background()
4559+
if err := client.Get(ctx, types.NamespacedName{Name: snapshot.Name}, snapshot); err != nil {
4560+
t.Fatalf("Failed to get snapshot: %v", err)
4561+
}
4562+
got, err := r.shouldCreateNewResourceSnapshotNow(ctx, snapshot)
4563+
if err != nil {
4564+
t.Fatalf("shouldCreateNewResourceSnapshotNow() failed: %v", err)
4565+
}
4566+
if got.Requeue != tc.wantRequeue {
4567+
t.Errorf("shouldCreateNewResourceSnapshotNow() = %v, want %v", got.Requeue, tc.wantRequeue)
4568+
}
4569+
if tc.wantRequeue {
4570+
if got.RequeueAfter <= 0 || got.RequeueAfter > half {
4571+
t.Errorf("shouldCreateNewResourceSnapshotNow() = RequeueAfter got %v, want in (0, %v], ", got.RequeueAfter, half)
4572+
}
4573+
}
4574+
if err := client.Get(ctx, types.NamespacedName{Name: snapshot.Name}, snapshot); err != nil {
4575+
t.Fatalf("failed to get snapshot after shouldCreateNewResourceSnapshotNow: %v", err)
4576+
}
4577+
if gotAnnotation := len(snapshot.Annotations[fleetv1beta1.NextResourceSnapshotCandidateDetectionTimeAnnotation]) != 0; tc.wantAnnoation != gotAnnotation {
4578+
t.Errorf("shouldCreateNewResourceSnapshotNow() = annotation %v, want %v", snapshot.Annotations[fleetv1beta1.NextResourceSnapshotCandidateDetectionTimeAnnotation], tc.wantAnnoation)
4579+
}
4580+
})
4581+
}
4582+
}

pkg/utils/annotations/annotations.go

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ package annotations
2020
import (
2121
"fmt"
2222
"strconv"
23+
"time"
2324

2425
"sigs.k8s.io/controller-runtime/pkg/client"
2526

@@ -98,3 +99,17 @@ func ExtractNumberOfEnvelopeObjFromResourceSnapshot(snapshot *fleetv1beta1.Clust
9899
}
99100
return envelopeObjCount, nil
100101
}
102+
103+
// ExtractNextResourceSnapshotCandidateDetectionTimeFromResourceSnapshot extracts the next resource snapshot candidate detection time from the annotations of a clusterResourceSnapshot.
104+
// If the annotation does not exist, it returns 0 duration.
105+
func ExtractNextResourceSnapshotCandidateDetectionTimeFromResourceSnapshot(snapshot *fleetv1beta1.ClusterResourceSnapshot) (time.Time, error) {
106+
nextDetectionTimeStr, ok := snapshot.Annotations[fleetv1beta1.NextResourceSnapshotCandidateDetectionTimeAnnotation]
107+
if !ok {
108+
return time.Time{}, nil
109+
}
110+
nextDetectionTime, err := time.Parse(time.RFC3339, nextDetectionTimeStr)
111+
if err != nil {
112+
return time.Time{}, fmt.Errorf("invalid annotation %s: %s is not a valid RFC3339 time: %w", fleetv1beta1.NextResourceSnapshotCandidateDetectionTimeAnnotation, nextDetectionTimeStr, err)
113+
}
114+
return nextDetectionTime, nil
115+
}

pkg/utils/annotations/annotations_test.go

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ package annotations
1818

1919
import (
2020
"testing"
21+
"time"
2122

2223
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
2324

@@ -394,3 +395,59 @@ func TestExtractNumberOfEnvelopeObjFromResourceSnapshot(t *testing.T) {
394395
})
395396
}
396397
}
398+
399+
func TestExtractNextResourceSnapshotCandidateDetectionTimeFromResourceSnapshot(t *testing.T) {
400+
validTime := time.Date(2025, 1, 1, 0, 0, 0, 0, time.UTC)
401+
testCases := []struct {
402+
name string
403+
snapshot *fleetv1beta1.ClusterResourceSnapshot
404+
want time.Time
405+
wantError bool
406+
}{
407+
{
408+
name: "valid annotation",
409+
snapshot: &fleetv1beta1.ClusterResourceSnapshot{
410+
ObjectMeta: metav1.ObjectMeta{
411+
Name: snapshotName,
412+
Annotations: map[string]string{
413+
fleetv1beta1.NextResourceSnapshotCandidateDetectionTimeAnnotation: validTime.Format(time.RFC3339),
414+
},
415+
},
416+
},
417+
want: validTime,
418+
},
419+
{
420+
name: "no annotation means no next detection time",
421+
snapshot: &fleetv1beta1.ClusterResourceSnapshot{
422+
ObjectMeta: metav1.ObjectMeta{
423+
Name: snapshotName,
424+
},
425+
},
426+
want: time.Time{},
427+
},
428+
{
429+
name: "invalid annotation format",
430+
snapshot: &fleetv1beta1.ClusterResourceSnapshot{
431+
ObjectMeta: metav1.ObjectMeta{
432+
Name: snapshotName,
433+
Annotations: map[string]string{
434+
fleetv1beta1.NextResourceSnapshotCandidateDetectionTimeAnnotation: "invalid-time",
435+
},
436+
},
437+
},
438+
wantError: true,
439+
},
440+
}
441+
442+
for _, tc := range testCases {
443+
t.Run(tc.name, func(t *testing.T) {
444+
got, err := ExtractNextResourceSnapshotCandidateDetectionTimeFromResourceSnapshot(tc.snapshot)
445+
if gotErr := err != nil; gotErr != tc.wantError {
446+
t.Fatalf("ExtractNextResourceSnapshotCandidateDetectionTimeFromResourceSnapshot() got err %v, want err %v", err, tc.wantError)
447+
}
448+
if !tc.wantError && got != tc.want {
449+
t.Errorf("ExtractNextResourceSnapshotCandidateDetectionTimeFromResourceSnapshot() got %s, want %s", got, tc.want)
450+
}
451+
})
452+
}
453+
}

test/e2e/README.md

Lines changed: 33 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,9 @@ test suites, follow the steps below:
1414
# Use a different path if the local set up is different.
1515
export KUBECONFIG=~/.kube/config
1616
export OUTPUT_TYPE=type=docker
17+
# optional, used to test the placement features with custom configurations
18+
# It defaults to 0m.
19+
# export RESOURCE_SNAPSHOT_CREATION_INTERVAL=1m
1720
./setup.sh ${number of member clusters}
1821
```
1922

@@ -25,10 +28,38 @@ test suites, follow the steps below:
2528
* Upload the images to the `Kind` clusters
2629
* Install the agent images
2730

28-
3. Run the command below to start running the tests:
31+
3. Run the command below to run non custom configuration e2e tests with the following command:
2932

3033
```sh
31-
ginkgo -v -p .
34+
ginkgo --label-filter="!custom" -v -p .
35+
```
36+
37+
or run the custom configuration e2e tests with the following command
38+
```sh
39+
ginkgo --label-filter="custom" -v -p .
40+
```
41+
42+
or create a launch.json in your vscode workspace.
43+
```yaml
44+
{
45+
"version": "0.2.0",
46+
"configurations": [
47+
{
48+
"name": "Launch E2E Tests",
49+
"type": "go",
50+
"request": "launch",
51+
"mode": "test",
52+
"program": "${workspaceFolder}/test/e2e",
53+
"args": [],
54+
"env": {
55+
"KUBECONFIG": "~/.kube/config",
56+
#"RESOURCE_SNAPSHOT_CREATION_INTERVAL": "1m",
57+
},
58+
"buildFlags": "-tags=e2e",
59+
"showLog": true
60+
}
61+
]
62+
}
3263
```
3364

3465
## Access the `Kind` clusters

0 commit comments

Comments
 (0)