Skip to content

Commit a2cdffb

Browse files
author
Arvind Thirumurugan
committed
add more checks
1 parent ab1e5e4 commit a2cdffb

File tree

7 files changed

+156
-54
lines changed

7 files changed

+156
-54
lines changed

pkg/controllers/clusterresourceplacementeviction/controller.go

Lines changed: 1 addition & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -169,7 +169,7 @@ func (r *Reconciler) executeEviction(ctx context.Context, validationResult *evic
169169
return nil
170170
}
171171

172-
if !isPlacementPresent(evictionTargetBinding) {
172+
if !evictionutils.IsPlacementPresent(evictionTargetBinding) {
173173
klog.V(2).InfoS("No resources have been placed for ClusterResourceBinding in target cluster",
174174
"clusterResourcePlacementEviction", eviction.Name, "clusterResourceBinding", evictionTargetBinding.Name, "targetCluster", eviction.Spec.ClusterName)
175175
markEvictionNotExecuted(eviction, condition.EvictionBlockedMissingPlacementMessage)
@@ -220,21 +220,6 @@ func (r *Reconciler) executeEviction(ctx context.Context, validationResult *evic
220220
return nil
221221
}
222222

223-
// isPlacementPresent checks to see if placement on target cluster could be present.
224-
func isPlacementPresent(binding *placementv1beta1.ClusterResourceBinding) bool {
225-
if binding.Spec.State == placementv1beta1.BindingStateBound {
226-
return true
227-
}
228-
if binding.Spec.State == placementv1beta1.BindingStateUnscheduled {
229-
currentAnnotation := binding.GetAnnotations()
230-
previousState, exist := currentAnnotation[placementv1beta1.PreviousBindingStateAnnotation]
231-
if exist && placementv1beta1.BindingState(previousState) == placementv1beta1.BindingStateBound {
232-
return true
233-
}
234-
}
235-
return false
236-
}
237-
238223
// isEvictionAllowed calculates if eviction allowed based on available bindings and spec specified in placement disruption budget.
239224
func isEvictionAllowed(bindings []placementv1beta1.ClusterResourceBinding, crp placementv1beta1.ClusterResourcePlacement, db placementv1beta1.ClusterResourcePlacementDisruptionBudget) (bool, int) {
240225
availableBindings := 0

pkg/utils/eviction/eviction.go

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,3 +20,18 @@ func IsEvictionInTerminalState(eviction *placementv1beta1.ClusterResourcePlaceme
2020
}
2121
return false
2222
}
23+
24+
// IsPlacementPresent checks to see if placement on target cluster could be present.
25+
func IsPlacementPresent(binding *placementv1beta1.ClusterResourceBinding) bool {
26+
if binding.Spec.State == placementv1beta1.BindingStateBound {
27+
return true
28+
}
29+
if binding.Spec.State == placementv1beta1.BindingStateUnscheduled {
30+
currentAnnotation := binding.GetAnnotations()
31+
previousState, exist := currentAnnotation[placementv1beta1.PreviousBindingStateAnnotation]
32+
if exist && placementv1beta1.BindingState(previousState) == placementv1beta1.BindingStateBound {
33+
return true
34+
}
35+
}
36+
return false
37+
}

pkg/utils/eviction/eviction_test.go

Lines changed: 59 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,65 @@ func TestIsEvictionInTerminalState(t *testing.T) {
8383
for _, tt := range tests {
8484
t.Run(tt.name, func(t *testing.T) {
8585
if got := IsEvictionInTerminalState(tt.eviction); got != tt.want {
86-
t.Errorf("IsEvictionInTerminalState test failed got = %v, want %v", got, tt.want)
86+
t.Errorf("IsEvictionInTerminalState test failed got = %v, want = %v", got, tt.want)
87+
}
88+
})
89+
}
90+
}
91+
92+
func TestIsPlacementPresent(t *testing.T) {
93+
tests := []struct {
94+
name string
95+
binding *placementv1beta1.ClusterResourceBinding
96+
want bool
97+
}{
98+
{
99+
name: "Bound binding - placement present",
100+
binding: &placementv1beta1.ClusterResourceBinding{
101+
Spec: placementv1beta1.ResourceBindingSpec{
102+
State: placementv1beta1.BindingStateBound,
103+
},
104+
},
105+
want: true,
106+
},
107+
{
108+
name: "Unscheduled binding with previous state bound - placement present",
109+
binding: &placementv1beta1.ClusterResourceBinding{
110+
Spec: placementv1beta1.ResourceBindingSpec{
111+
State: placementv1beta1.BindingStateUnscheduled,
112+
},
113+
ObjectMeta: metav1.ObjectMeta{
114+
Annotations: map[string]string{
115+
placementv1beta1.PreviousBindingStateAnnotation: string(placementv1beta1.BindingStateBound),
116+
},
117+
},
118+
},
119+
want: true,
120+
},
121+
{
122+
name: "Unscheduled binding with no previous state - placement not present",
123+
binding: &placementv1beta1.ClusterResourceBinding{
124+
Spec: placementv1beta1.ResourceBindingSpec{
125+
State: placementv1beta1.BindingStateUnscheduled,
126+
},
127+
},
128+
want: false,
129+
},
130+
{
131+
name: "Scheduled binding - placement not present",
132+
binding: &placementv1beta1.ClusterResourceBinding{
133+
Spec: placementv1beta1.ResourceBindingSpec{
134+
State: placementv1beta1.BindingStateScheduled,
135+
},
136+
},
137+
want: false,
138+
},
139+
}
140+
141+
for _, tt := range tests {
142+
t.Run(tt.name, func(t *testing.T) {
143+
if got := IsPlacementPresent(tt.binding); got != tt.want {
144+
t.Errorf("IsPlacementPresent test failed got = %v, want = %v", got, tt.want)
87145
}
88146
})
89147
}

tools/draincluster/main.go

Lines changed: 46 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -64,11 +64,16 @@ func main() {
6464
ClusterResourcePlacementResourcesMap: make(map[string][]placementv1beta1.ResourceIdentifier),
6565
}
6666

67-
if err = drainCluster.cordonCluster(ctx); err != nil {
67+
uncordonCluster := toolsutils.UncordonCluster{
68+
HubClient: hubClient,
69+
ClusterName: *clusterName,
70+
}
71+
72+
if err = drainCluster.cordon(ctx); err != nil {
6873
log.Fatalf("failed to cordon member cluster %s: %v", drainCluster.ClusterName, err)
6974
}
7075

71-
isDrainSuccessful, err := drainCluster.drainCluster(ctx)
76+
isDrainSuccessful, err := drainCluster.drain(ctx)
7277
if err != nil {
7378
log.Fatalf("failed to drain member cluster %s: %v", drainCluster.ClusterName, err)
7479
}
@@ -78,19 +83,24 @@ func main() {
7883
} else {
7984
log.Printf("drain was not successful for cluster %s, some evictions were not successfully executed", *clusterName)
8085
log.Printf("retrying drain to evict the resources that were not successfully removed")
81-
isDrainRetrySuccessful, err := drainCluster.drainCluster(ctx)
86+
// reset ClusterResourcePlacementResourcesMap for retry.
87+
drainCluster.ClusterResourcePlacementResourcesMap = map[string][]placementv1beta1.ResourceIdentifier{}
88+
isDrainRetrySuccessful, err := drainCluster.drain(ctx)
8289
if err != nil {
8390
log.Fatalf("failed to drain cluster again %s: %v", drainCluster.ClusterName, err)
8491
}
8592
if isDrainRetrySuccessful {
8693
log.Printf("drain retry was successful for cluster %s", *clusterName)
8794
} else {
88-
log.Printf("drain retry was not successful for cluster %s, some evictions were not successfully executed", *clusterName)
95+
if err = uncordonCluster.Uncordon(ctx); err != nil {
96+
log.Fatalf("failed to uncordon cluster %s: %v", *clusterName, err)
97+
}
98+
log.Fatalf("drain retry was not successful for cluster %s, some evictions were not successfully executed", *clusterName)
8999
}
90100
}
91101
}
92102

93-
func (d *DrainCluster) cordonCluster(ctx context.Context) error {
103+
func (d *DrainCluster) cordon(ctx context.Context) error {
94104
// add taint to member cluster to ensure resources aren't scheduled on it.
95105
return retry.RetryOnConflict(retry.DefaultRetry, func() error {
96106
var mc clusterv1beta1.MemberCluster
@@ -117,20 +127,44 @@ func (d *DrainCluster) cordonCluster(ctx context.Context) error {
117127
})
118128
}
119129

120-
func (d *DrainCluster) drainCluster(ctx context.Context) (bool, error) {
130+
func (d *DrainCluster) drain(ctx context.Context) (bool, error) {
121131
var crpList placementv1beta1.ClusterResourcePlacementList
122132
if err := d.hubClient.List(ctx, &crpList); err != nil {
123133
return false, fmt.Errorf("failed to list cluster resource placements: %v", err)
124134
}
125135

126136
// find all unique CRP names for which eviction needs to occur.
127137
for i := range crpList.Items {
128-
crpStatus := crpList.Items[i].Status
129-
for j := range crpStatus.PlacementStatuses {
130-
placementStatus := crpStatus.PlacementStatuses[j]
131-
if placementStatus.ClusterName == d.ClusterName {
132-
d.ClusterResourcePlacementResourcesMap[crpList.Items[i].Name] = resourcePropagatedByCRP(crpStatus.SelectedResources, placementStatus.FailedPlacements)
133-
break
138+
// list all bindings for the CRP.
139+
crp := crpList.Items[i]
140+
var crbList placementv1beta1.ClusterResourceBindingList
141+
if err := d.hubClient.List(ctx, &crbList, client.MatchingLabels{placementv1beta1.CRPTrackingLabel: crp.Name}); err != nil {
142+
return false, fmt.Errorf("failed to list cluster resource bindings for CRP %s: %v", crp.Name, err)
143+
}
144+
var targetBinding *placementv1beta1.ClusterResourceBinding
145+
for j := range crbList.Items {
146+
crb := crbList.Items[j]
147+
if crb.Spec.TargetCluster == d.ClusterName {
148+
targetBinding = &crb
149+
}
150+
}
151+
if targetBinding != nil && targetBinding.DeletionTimestamp == nil {
152+
// check if placement is present.
153+
if !evictionutils.IsPlacementPresent(targetBinding) {
154+
return false, fmt.Errorf("placement is not present in cluster %s for CRP %s, please retry drain once resources are propagated to the cluster", d.ClusterName, crp.Name)
155+
}
156+
isCRPStatusUpdated := false
157+
crpStatus := crp.Status
158+
for j := range crpStatus.PlacementStatuses {
159+
placementStatus := crpStatus.PlacementStatuses[j]
160+
if placementStatus.ClusterName == d.ClusterName {
161+
isCRPStatusUpdated = true
162+
d.ClusterResourcePlacementResourcesMap[crpList.Items[i].Name] = resourcePropagatedByCRP(crpStatus.SelectedResources, placementStatus.FailedPlacements)
163+
break
164+
}
165+
}
166+
if !isCRPStatusUpdated {
167+
return false, fmt.Errorf("failed to determine all resources propagated to cluster %s for CRP %s", d.ClusterName, crp.Name)
134168
}
135169
}
136170
}

tools/uncordoncluster/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ To uncordon a member cluster connected to a fleet, you can use the `uncordon-clu
44
uncordon a member cluster that has been cordoned using the `cordon-cluster` tool.
55

66
```
7-
go run tools/cordoncluster/main.go --hubClusterContext <hub-cluster-context> --clusterName <memberClusterName>
7+
go run tools/uncordoncluster/main.go --hubClusterContext <hub-cluster-context> --clusterName <memberClusterName>
88
```
99

1010
the tool currently is a go program that also takes the hub cluster context and the member cluster name as arguments.

tools/uncordoncluster/main.go

Lines changed: 5 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,6 @@ import (
66
"log"
77

88
"k8s.io/apimachinery/pkg/runtime"
9-
"k8s.io/apimachinery/pkg/types"
10-
"k8s.io/client-go/util/retry"
119

1210
clusterv1beta1 "go.goms.io/fleet/apis/cluster/v1beta1"
1311
placementv1beta1 "go.goms.io/fleet/apis/placement/v1beta1"
@@ -41,30 +39,13 @@ func main() {
4139
log.Fatalf("failed to create hub cluster client: %v", err)
4240
}
4341

44-
var mc clusterv1beta1.MemberCluster
45-
if err = hubClient.Get(ctx, types.NamespacedName{Name: *clusterName}, &mc); err != nil {
46-
log.Fatalf("failed to get member cluster %s: %v", *clusterName, err)
42+
uncordonCluster := toolsutils.UncordonCluster{
43+
HubClient: hubClient,
44+
ClusterName: *clusterName,
4745
}
4846

49-
// remove existing taints from member cluster.
50-
err = retry.RetryOnConflict(retry.DefaultRetry, func() error {
51-
var mc clusterv1beta1.MemberCluster
52-
if err := hubClient.Get(ctx, types.NamespacedName{Name: *clusterName}, &mc); err != nil {
53-
return err
54-
}
55-
56-
if mc.Spec.Taints == nil {
57-
return nil
58-
}
59-
60-
// remove all taints from member cluster.
61-
mc.Spec.Taints = nil
62-
63-
return hubClient.Update(ctx, &mc)
64-
})
65-
66-
if err != nil {
67-
log.Fatalf("failed to remove taints from member cluster %s: %v", *clusterName, err)
47+
if err = uncordonCluster.Uncordon(ctx); err != nil {
48+
log.Fatalf("failed to uncordon cluster %s: %v", *clusterName, err)
6849
}
6950

7051
log.Printf("uncordoned member cluster %s", *clusterName)

tools/utils/common.go

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,14 @@
11
package utils
22

33
import (
4+
"context"
45
"os"
56

7+
"k8s.io/apimachinery/pkg/types"
8+
"k8s.io/client-go/util/retry"
9+
10+
clusterv1beta1 "go.goms.io/fleet/apis/cluster/v1beta1"
11+
612
"k8s.io/apimachinery/pkg/runtime"
713
"k8s.io/client-go/tools/clientcmd"
814
"sigs.k8s.io/controller-runtime/pkg/client"
@@ -12,6 +18,29 @@ var (
1218
kubeConfigPath = os.Getenv("KUBECONFIG")
1319
)
1420

21+
type UncordonCluster struct {
22+
HubClient client.Client
23+
ClusterName string
24+
}
25+
26+
func (u *UncordonCluster) Uncordon(ctx context.Context) error {
27+
return retry.RetryOnConflict(retry.DefaultRetry, func() error {
28+
var mc clusterv1beta1.MemberCluster
29+
if err := u.HubClient.Get(ctx, types.NamespacedName{Name: u.ClusterName}, &mc); err != nil {
30+
return err
31+
}
32+
33+
if mc.Spec.Taints == nil {
34+
return nil
35+
}
36+
37+
// remove all taints from member cluster.
38+
mc.Spec.Taints = nil
39+
40+
return u.HubClient.Update(ctx, &mc)
41+
})
42+
}
43+
1544
func GetClusterClientFromClusterContext(clusterContext string, scheme *runtime.Scheme) (client.Client, error) {
1645
clusterConfig := clientcmd.NewNonInteractiveDeferredLoadingClientConfig(
1746
&clientcmd.ClientConfigLoadingRules{ExplicitPath: kubeConfigPath},

0 commit comments

Comments
 (0)