@@ -32,6 +32,7 @@ import (
3232 "k8s.io/apimachinery/pkg/runtime"
3333 "k8s.io/apimachinery/pkg/types"
3434 "k8s.io/apimachinery/pkg/util/sets"
35+ "k8s.io/client-go/tools/record"
3536
3637 ctrl "sigs.k8s.io/controller-runtime"
3738 "sigs.k8s.io/controller-runtime/pkg/client"
@@ -54,8 +55,9 @@ const (
5455// AppWrapperReconciler reconciles an appwrapper
5556type AppWrapperReconciler struct {
5657 client.Client
57- Scheme * runtime.Scheme
58- Config * config.AppWrapperConfig
58+ Recorder record.EventRecorder
59+ Scheme * runtime.Scheme
60+ Config * config.AppWrapperConfig
5961}
6062
6163type podStatusSummary struct {
@@ -210,12 +212,14 @@ func (r *AppWrapperReconciler) Reconcile(ctx context.Context, req ctrl.Request)
210212 return ctrl.Result {RequeueAfter : 1 * time .Second }, nil
211213 }
212214 }
215+ detailMsg := fmt .Sprintf ("error creating components: %v" , err )
213216 meta .SetStatusCondition (& aw .Status .Conditions , metav1.Condition {
214217 Type : string (workloadv1beta2 .Unhealthy ),
215218 Status : metav1 .ConditionTrue ,
216219 Reason : "CreateFailed" ,
217- Message : fmt . Sprintf ( "error creating components: %v" , err ) ,
220+ Message : detailMsg ,
218221 })
222+ r .Recorder .Event (aw , v1 .EventTypeNormal , string (workloadv1beta2 .Unhealthy ), "CreateFailed: " + detailMsg )
219223 if fatal {
220224 return r .updateStatus (ctx , aw , workloadv1beta2 .AppWrapperFailed ) // always move to failed on fatal error
221225 } else {
@@ -240,25 +244,29 @@ func (r *AppWrapperReconciler) Reconcile(ctx context.Context, req ctrl.Request)
240244 }
241245
242246 // Detect externally deleted components and transition to Failed with no GracePeriod or retry
247+ detailMsg := fmt .Sprintf ("Only found %v deployed components, but was expecting %v" , compStatus .deployed , compStatus .expected )
243248 if compStatus .deployed != compStatus .expected {
244249 meta .SetStatusCondition (& aw .Status .Conditions , metav1.Condition {
245250 Type : string (workloadv1beta2 .Unhealthy ),
246251 Status : metav1 .ConditionTrue ,
247252 Reason : "MissingComponent" ,
248- Message : fmt . Sprintf ( "Only found %v deployed components, but was expecting %v" , compStatus . deployed , compStatus . expected ) ,
253+ Message : detailMsg ,
249254 })
255+ r .Recorder .Event (aw , v1 .EventTypeNormal , string (workloadv1beta2 .Unhealthy ), "MissingComponent: " + detailMsg )
250256 return r .updateStatus (ctx , aw , workloadv1beta2 .AppWrapperFailed )
251257 }
252258
253259 // If a component's controller has put it into a failed state, we do not need
254260 // to allow any further grace period. The situation will not self-correct.
261+ detailMsg = fmt .Sprintf ("Found %v failed components" , compStatus .failed )
255262 if compStatus .failed > 0 {
256263 meta .SetStatusCondition (& aw .Status .Conditions , metav1.Condition {
257264 Type : string (workloadv1beta2 .Unhealthy ),
258265 Status : metav1 .ConditionTrue ,
259266 Reason : "FailedComponent" ,
260- Message : fmt . Sprintf ( "Found %v failed components" , compStatus . failed ) ,
267+ Message : detailMsg ,
261268 })
269+ r .Recorder .Event (aw , v1 .EventTypeNormal , string (workloadv1beta2 .Unhealthy ), "FailedComponent: " + detailMsg )
262270 return r .resetOrFail (ctx , aw , podStatus .terminalFailure , 1 )
263271 }
264272
@@ -297,20 +305,22 @@ func (r *AppWrapperReconciler) Reconcile(ctx context.Context, req ctrl.Request)
297305 if now .Before (deadline ) {
298306 return ctrl.Result {RequeueAfter : deadline .Sub (now )}, r .Status ().Update (ctx , aw )
299307 } else {
308+ r .Recorder .Eventf (aw , v1 .EventTypeNormal , string (workloadv1beta2 .Unhealthy ), "FoundFailedPods: %v failed pods" , podStatus .failed )
300309 return r .resetOrFail (ctx , aw , podStatus .terminalFailure , 1 )
301310 }
302311 }
303312
304313 // Initiate migration of workloads that are using resources that Autopilot has flagged as unhealthy
314+ detailMsg = fmt .Sprintf ("Workload contains pods using unhealthy resources on Nodes: %v" , podStatus .unhealthyNodes )
305315 if len (podStatus .unhealthyNodes ) > 0 {
306316 meta .SetStatusCondition (& aw .Status .Conditions , metav1.Condition {
307317 Type : string (workloadv1beta2 .Unhealthy ),
308318 Status : metav1 .ConditionTrue ,
309319 Reason : "AutopilotUnhealthy" ,
310- Message : fmt . Sprintf ( "Workload contains pods using unhealthy resources on Nodes: %v" , podStatus . unhealthyNodes ) ,
320+ Message : detailMsg ,
311321 })
312- // Autopilot triggered evacuation does not increment retry count
313- return r .resetOrFail (ctx , aw , false , 0 )
322+ r . Recorder . Event ( aw , v1 . EventTypeNormal , string ( workloadv1beta2 . Unhealthy ), detailMsg )
323+ return r .resetOrFail (ctx , aw , false , 0 ) // Autopilot triggered evacuation does not increment retry count
314324 }
315325
316326 clearCondition (aw , workloadv1beta2 .Unhealthy , "FoundNoFailedPods" , "" )
@@ -344,6 +354,7 @@ func (r *AppWrapperReconciler) Reconcile(ctx context.Context, req ctrl.Request)
344354 Reason : "InsufficientPodsReady" ,
345355 Message : podDetailsMessage ,
346356 })
357+ r .Recorder .Event (aw , v1 .EventTypeNormal , string (workloadv1beta2 .Unhealthy ), "InsufficientPodsReady: " + podDetailsMessage )
347358 return r .resetOrFail (ctx , aw , podStatus .terminalFailure , 1 )
348359 }
349360
0 commit comments