Skip to content

Commit baaeed5

Browse files
committed
add comments
Signed-off-by: Wantong Jiang <[email protected]>
1 parent 2595a0c commit baaeed5

File tree

4 files changed

+127
-117
lines changed

4 files changed

+127
-117
lines changed

tools/fleet/cmd/drain/drain.go

Lines changed: 76 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,66 @@ var (
5050
clusterName string
5151
)
5252

53+
// NewCmdDrain creates a new drain command
54+
func NewCmdDrain() *cobra.Command {
55+
cmd := &cobra.Command{
56+
Use: "drain",
57+
Short: "Drain a member cluster",
58+
Long: "Drain a member cluster by cordoning it and removing propagated resources",
59+
RunE: func(command *cobra.Command, args []string) error {
60+
return runDrain()
61+
},
62+
}
63+
64+
// Add flags specific to drain command
65+
cmd.Flags().StringVar(&hubClusterContext, "hubClusterContext", "", "kubectl context for the hub cluster (required)")
66+
cmd.Flags().StringVar(&clusterName, "clusterName", "", "name of the member cluster (required)")
67+
68+
// Mark required flags
69+
_ = cmd.MarkFlagRequired("hubClusterContext")
70+
_ = cmd.MarkFlagRequired("clusterName")
71+
72+
return cmd
73+
}
74+
75+
func runDrain() error {
76+
_, hubClient, err := setupClient()
77+
if err != nil {
78+
return err
79+
}
80+
81+
ctx := context.Background()
82+
drainHelper := &helper{
83+
hubClient: hubClient,
84+
clusterName: clusterName,
85+
}
86+
87+
isDrainSuccessful, err := drainHelper.Drain(ctx)
88+
if err != nil {
89+
log.Fatalf("failed to drain member cluster %s: %v", clusterName, err)
90+
}
91+
92+
if isDrainSuccessful {
93+
log.Printf("drain was successful for cluster %s", clusterName)
94+
} else {
95+
log.Printf("drain was not successful for cluster %s", clusterName)
96+
}
97+
98+
log.Printf("retrying drain to ensure all resources propagated from hub cluster are evicted")
99+
isDrainRetrySuccessful, err := drainHelper.Drain(ctx)
100+
if err != nil {
101+
log.Fatalf("failed to drain cluster on retry %s: %v", clusterName, err)
102+
}
103+
if isDrainRetrySuccessful {
104+
log.Printf("drain retry was successful for cluster %s", clusterName)
105+
} else {
106+
log.Printf("drain retry was not successful for cluster %s", clusterName)
107+
}
108+
109+
log.Printf("reminder: uncordon the cluster %s to remove cordon taint if needed", clusterName)
110+
return nil
111+
}
112+
53113
// setupClient creates and configures the Kubernetes client
54114
func setupClient() (*runtime.Scheme, client.Client, error) {
55115
scheme := runtime.NewScheme()
@@ -69,12 +129,12 @@ func setupClient() (*runtime.Scheme, client.Client, error) {
69129
return scheme, hubClient, nil
70130
}
71131

72-
type drainHelper struct {
132+
type helper struct {
73133
hubClient client.Client
74134
clusterName string
75135
}
76136

77-
func (h *drainHelper) Drain(ctx context.Context) (bool, error) {
137+
func (h *helper) Drain(ctx context.Context) (bool, error) {
78138
if err := h.cordon(ctx); err != nil {
79139
return false, fmt.Errorf("failed to cordon member cluster %s: %w", h.clusterName, err)
80140
}
@@ -91,6 +151,7 @@ func (h *drainHelper) Drain(ctx context.Context) (bool, error) {
91151
}
92152

93153
isDrainSuccessful := true
154+
// create eviction objects for all <crpName, targetCluster>.
94155
for crpName := range crpNameMap {
95156
evictionName, err := generateDrainEvictionName(crpName, h.clusterName)
96157
if err != nil {
@@ -118,6 +179,7 @@ func (h *drainHelper) Drain(ctx context.Context) (bool, error) {
118179

119180
log.Printf("Created eviction %s for CRP %s targeting member cluster %s", evictionName, crpName, h.clusterName)
120181

182+
// wait until evictions reach a terminal state.
121183
var eviction placementv1beta1.ClusterResourcePlacementEviction
122184
err = wait.ExponentialBackoffWithContext(ctx, retry.DefaultBackoff, func(ctx context.Context) (bool, error) {
123185
if err := h.hubClient.Get(ctx, types.NamespacedName{Name: evictionName}, &eviction); err != nil {
@@ -130,8 +192,10 @@ func (h *drainHelper) Drain(ctx context.Context) (bool, error) {
130192
return false, fmt.Errorf("failed to wait for eviction %s for CRP %s targeting member cluster %s to reach terminal state: %w", evictionName, crpName, h.clusterName, err)
131193
}
132194

195+
// TODO: add safeguards to check if eviction conditions are set to unknown.
133196
validCondition := eviction.GetCondition(string(placementv1beta1.PlacementEvictionConditionTypeValid))
134197
if validCondition != nil && validCondition.Status == metav1.ConditionFalse {
198+
// check to see if CRP is missing or CRP is being deleted or CRB is missing.
135199
if validCondition.Reason == condition.EvictionInvalidMissingCRPMessage ||
136200
validCondition.Reason == condition.EvictionInvalidDeletingCRPMessage ||
137201
validCondition.Reason == condition.EvictionInvalidMissingCRBMessage {
@@ -146,7 +210,7 @@ func (h *drainHelper) Drain(ctx context.Context) (bool, error) {
146210
continue
147211
}
148212
log.Printf("eviction %s was executed successfully for CRP %s targeting member cluster %s", evictionName, crpName, h.clusterName)
149-
213+
// log each cluster scoped resource evicted for CRP.
150214
clusterScopedResourceIdentifiers, err := h.collectClusterScopedResourcesSelectedByCRP(ctx, crpName)
151215
if err != nil {
152216
log.Printf("failed to collect cluster scoped resources selected by CRP %s: %v", crpName, err)
@@ -160,32 +224,36 @@ func (h *drainHelper) Drain(ctx context.Context) (bool, error) {
160224
return isDrainSuccessful, nil
161225
}
162226

163-
func (h *drainHelper) cordon(ctx context.Context) error {
227+
func (h *helper) cordon(ctx context.Context) error {
228+
// add taint to member cluster to ensure resources aren't scheduled on it.
164229
return retry.RetryOnConflict(retry.DefaultRetry, func() error {
165230
var mc clusterv1beta1.MemberCluster
166231
if err := h.hubClient.Get(ctx, types.NamespacedName{Name: h.clusterName}, &mc); err != nil {
167232
return err
168233
}
169234

235+
// search to see cordonTaint already exists on the cluster.
170236
for i := range mc.Spec.Taints {
171237
if mc.Spec.Taints[i] == toolsutils.CordonTaint {
172238
return nil
173239
}
174240
}
175241

242+
// add taint to member cluster to cordon.
176243
mc.Spec.Taints = append(mc.Spec.Taints, toolsutils.CordonTaint)
177244

178245
return h.hubClient.Update(ctx, &mc)
179246
})
180247
}
181248

182-
func (h *drainHelper) fetchClusterResourcePlacementNamesToEvict(ctx context.Context) (map[string]bool, error) {
249+
func (h *helper) fetchClusterResourcePlacementNamesToEvict(ctx context.Context) (map[string]bool, error) {
183250
var crbList placementv1beta1.ClusterResourceBindingList
184251
if err := h.hubClient.List(ctx, &crbList); err != nil {
185252
return map[string]bool{}, fmt.Errorf("failed to list cluster resource bindings: %w", err)
186253
}
187254

188255
crpNameMap := make(map[string]bool)
256+
// find all unique CRP names for which eviction needs to occur.
189257
for i := range crbList.Items {
190258
crb := crbList.Items[i]
191259
if crb.Spec.TargetCluster == h.clusterName && crb.DeletionTimestamp == nil {
@@ -200,14 +268,15 @@ func (h *drainHelper) fetchClusterResourcePlacementNamesToEvict(ctx context.Cont
200268
return crpNameMap, nil
201269
}
202270

203-
func (h *drainHelper) collectClusterScopedResourcesSelectedByCRP(ctx context.Context, crpName string) ([]placementv1beta1.ResourceIdentifier, error) {
271+
func (h *helper) collectClusterScopedResourcesSelectedByCRP(ctx context.Context, crpName string) ([]placementv1beta1.ResourceIdentifier, error) {
204272
var crp placementv1beta1.ClusterResourcePlacement
205273
if err := h.hubClient.Get(ctx, types.NamespacedName{Name: crpName}, &crp); err != nil {
206274
return nil, fmt.Errorf("failed to get ClusterResourcePlacement %s: %w", crpName, err)
207275
}
208276

209277
var resourcesPropagated []placementv1beta1.ResourceIdentifier
210278
for _, selectedResource := range crp.Status.SelectedResources {
279+
// only collect cluster scoped resources.
211280
if len(selectedResource.Namespace) == 0 {
212281
resourcesPropagated = append(resourcesPropagated, selectedResource)
213282
}
@@ -218,6 +287,7 @@ func (h *drainHelper) collectClusterScopedResourcesSelectedByCRP(ctx context.Con
218287
func generateDrainEvictionName(crpName, targetCluster string) (string, error) {
219288
evictionName := fmt.Sprintf(drainEvictionNameFormat, crpName, targetCluster, uuid.NewUUID()[:uuidLength])
220289

290+
// check to see if eviction name is a valid DNS1123 subdomain name https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#dns-subdomain-names.
221291
if errs := validation.IsDNS1123Subdomain(evictionName); len(errs) != 0 {
222292
return "", fmt.Errorf("failed to format a qualified name for drain eviction object with CRP name %s, cluster name %s: %v", crpName, targetCluster, errs)
223293
}
@@ -236,63 +306,3 @@ func generateResourceIdentifierKey(r placementv1beta1.ResourceIdentifier) string
236306
}
237307
return fmt.Sprintf(resourceIdentifierKeyFormat, r.Group, r.Version, r.Kind, r.Namespace, r.Name)
238308
}
239-
240-
// NewCmdDrain creates a new drain command
241-
func NewCmdDrain() *cobra.Command {
242-
cmd := &cobra.Command{
243-
Use: "drain",
244-
Short: "Drain a member cluster",
245-
Long: "Drain a member cluster by cordoning it and removing propagated resources",
246-
RunE: func(command *cobra.Command, args []string) error {
247-
return runDrain()
248-
},
249-
}
250-
251-
// Add flags specific to drain command
252-
cmd.Flags().StringVar(&hubClusterContext, "hubClusterContext", "", "kubectl context for the hub cluster (required)")
253-
cmd.Flags().StringVar(&clusterName, "clusterName", "", "name of the member cluster (required)")
254-
255-
// Mark required flags
256-
_ = cmd.MarkFlagRequired("hubClusterContext")
257-
_ = cmd.MarkFlagRequired("clusterName")
258-
259-
return cmd
260-
}
261-
262-
func runDrain() error {
263-
_, hubClient, err := setupClient()
264-
if err != nil {
265-
return err
266-
}
267-
268-
ctx := context.Background()
269-
drainHelper := &drainHelper{
270-
hubClient: hubClient,
271-
clusterName: clusterName,
272-
}
273-
274-
isDrainSuccessful, err := drainHelper.Drain(ctx)
275-
if err != nil {
276-
log.Fatalf("failed to drain member cluster %s: %v", clusterName, err)
277-
}
278-
279-
if isDrainSuccessful {
280-
log.Printf("drain was successful for cluster %s", clusterName)
281-
} else {
282-
log.Printf("drain was not successful for cluster %s", clusterName)
283-
}
284-
285-
log.Printf("retrying drain to ensure all resources propagated from hub cluster are evicted")
286-
isDrainRetrySuccessful, err := drainHelper.Drain(ctx)
287-
if err != nil {
288-
log.Fatalf("failed to drain cluster on retry %s: %v", clusterName, err)
289-
}
290-
if isDrainRetrySuccessful {
291-
log.Printf("drain retry was successful for cluster %s", clusterName)
292-
} else {
293-
log.Printf("drain retry was not successful for cluster %s", clusterName)
294-
}
295-
296-
log.Printf("reminder: uncordon the cluster %s to remove cordon taint if needed", clusterName)
297-
return nil
298-
}

tools/fleet/cmd/drain/drain_test.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -156,7 +156,7 @@ func TestFetchClusterResourcePlacementNamesToEvict(t *testing.T) {
156156
WithScheme(scheme).
157157
WithObjects(objects...).
158158
Build()
159-
h := drainHelper{
159+
h := helper{
160160
hubClient: fakeClient,
161161
clusterName: tc.targetCluster,
162162
}
@@ -273,7 +273,7 @@ func TestCollectClusterScopedResourcesSelectedByCRP(t *testing.T) {
273273
WithObjects(objects...).
274274
Build()
275275

276-
h := drainHelper{
276+
h := helper{
277277
hubClient: fakeClient,
278278
}
279279

@@ -479,7 +479,7 @@ func TestCordon(t *testing.T) {
479479
WithObjects(objects...).
480480
Build()
481481

482-
h := drainHelper{
482+
h := helper{
483483
hubClient: fakeClient,
484484
clusterName: "test-cluster",
485485
}

tools/fleet/cmd/uncordon/uncordon.go

Lines changed: 44 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,48 @@ var (
3636
clusterName string
3737
)
3838

39+
// NewCmdUncordon creates a new uncordon command
40+
func NewCmdUncordon() *cobra.Command {
41+
cmd := &cobra.Command{
42+
Use: "uncordon",
43+
Short: "Uncordon a member cluster",
44+
Long: "Uncordon a previously drained member cluster by removing the cordon taint",
45+
RunE: func(command *cobra.Command, args []string) error {
46+
return runUncordon()
47+
},
48+
}
49+
50+
// Add flags specific to uncordon command
51+
cmd.Flags().StringVar(&hubClusterContext, "hubClusterContext", "", "kubectl context for the hub cluster (required)")
52+
cmd.Flags().StringVar(&clusterName, "clusterName", "", "name of the member cluster (required)")
53+
54+
// Mark required flags
55+
_ = cmd.MarkFlagRequired("hubClusterContext")
56+
_ = cmd.MarkFlagRequired("clusterName")
57+
58+
return cmd
59+
}
60+
61+
func runUncordon() error {
62+
_, hubClient, err := setupClient()
63+
if err != nil {
64+
return err
65+
}
66+
67+
ctx := context.Background()
68+
uncordonHelper := &helper{
69+
hubClient: hubClient,
70+
clusterName: clusterName,
71+
}
72+
73+
if err := uncordonHelper.Uncordon(ctx); err != nil {
74+
log.Fatalf("failed to uncordon cluster %s: %v", clusterName, err)
75+
}
76+
77+
log.Printf("uncordoned member cluster %s", clusterName)
78+
return nil
79+
}
80+
3981
// setupClient creates and configures the Kubernetes client
4082
func setupClient() (*runtime.Scheme, client.Client, error) {
4183
scheme := runtime.NewScheme()
@@ -55,12 +97,12 @@ func setupClient() (*runtime.Scheme, client.Client, error) {
5597
return scheme, hubClient, nil
5698
}
5799

58-
type uncordonHelper struct {
100+
type helper struct {
59101
hubClient client.Client
60102
clusterName string
61103
}
62104

63-
func (h *uncordonHelper) Uncordon(ctx context.Context) error {
105+
func (h *helper) Uncordon(ctx context.Context) error {
64106
return retry.RetryOnConflict(retry.DefaultRetry, func() error {
65107
var mc clusterv1beta1.MemberCluster
66108
if err := h.hubClient.Get(ctx, types.NamespacedName{Name: h.clusterName}, &mc); err != nil {
@@ -84,45 +126,3 @@ func (h *uncordonHelper) Uncordon(ctx context.Context) error {
84126
return h.hubClient.Update(ctx, &mc)
85127
})
86128
}
87-
88-
// NewCmdUncordon creates a new uncordon command
89-
func NewCmdUncordon() *cobra.Command {
90-
cmd := &cobra.Command{
91-
Use: "uncordon",
92-
Short: "Uncordon a member cluster",
93-
Long: "Uncordon a previously drained member cluster by removing the cordon taint",
94-
RunE: func(command *cobra.Command, args []string) error {
95-
return runUncordon()
96-
},
97-
}
98-
99-
// Add flags specific to uncordon command
100-
cmd.Flags().StringVar(&hubClusterContext, "hubClusterContext", "", "kubectl context for the hub cluster (required)")
101-
cmd.Flags().StringVar(&clusterName, "clusterName", "", "name of the member cluster (required)")
102-
103-
// Mark required flags
104-
_ = cmd.MarkFlagRequired("hubClusterContext")
105-
_ = cmd.MarkFlagRequired("clusterName")
106-
107-
return cmd
108-
}
109-
110-
func runUncordon() error {
111-
_, hubClient, err := setupClient()
112-
if err != nil {
113-
return err
114-
}
115-
116-
ctx := context.Background()
117-
uncordonHelper := &uncordonHelper{
118-
hubClient: hubClient,
119-
clusterName: clusterName,
120-
}
121-
122-
if err := uncordonHelper.Uncordon(ctx); err != nil {
123-
log.Fatalf("failed to uncordon cluster %s: %v", clusterName, err)
124-
}
125-
126-
log.Printf("uncordoned member cluster %s", clusterName)
127-
return nil
128-
}

0 commit comments

Comments
 (0)