Skip to content

Commit 15f0b2e

Browse files
authored
chore: cleanup some logs (#1446)
- Simplify & fix applier/destroyer logs on success & failure - Add the component in front of a few logs to make it clear where in the code the log came from, to simplify debugging. - Log resource conflicts as warnings with explainations of how remediation will retry. - Only log the keys from the GVK maps in the remediator manager. - Fix remediator shutdown log to go to info instead of warning.
1 parent 2edae49 commit 15f0b2e

File tree

7 files changed

+53
-29
lines changed

7 files changed

+53
-29
lines changed

pkg/parse/updater.go

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -209,23 +209,20 @@ func (u *Updater) apply(ctx context.Context, objs []client.Object, commit string
209209
}
210210
}
211211
}
212-
klog.V(1).Info("Applier starting...")
212+
klog.Info("Applier starting...")
213213
start := time.Now()
214214
u.SyncErrorCache.ResetApplyErrors()
215215
objStatusMap, syncStats := u.Applier.Apply(ctx, eventHandler, objs)
216-
if syncStats.Empty() {
217-
klog.V(4).Info("Applier made no new progress")
218-
} else {
216+
if !syncStats.Empty() {
219217
klog.Infof("Applier made new progress: %s", syncStats.String())
220218
objStatusMap.Log(klog.V(0))
221219
}
222220
metrics.RecordApplyDuration(ctx, metrics.StatusTagKey(err), commit, start)
223221
if err != nil {
224-
klog.Warningf("Failed to apply declared resources: %v", err)
222+
klog.Warningf("Applier failed: %v", err)
225223
return err
226224
}
227-
klog.V(4).Info("Apply completed without error: all resources are up to date.")
228-
klog.V(3).Info("Applier stopped")
225+
klog.Info("Applier succeeded")
229226
return nil
230227
}
231228

pkg/reconciler/finalizer/base_finalizer.go

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -39,23 +39,20 @@ func (bf *baseFinalizer) destroy(ctx context.Context) status.MultiError {
3939
}
4040
}
4141
}
42-
klog.V(1).Info("Destroyer starting...")
42+
klog.Info("Destroyer starting...")
4343
// start := time.Now()
4444
objStatusMap, syncStats := bf.Destroyer.Destroy(ctx, eventHandler)
45-
if syncStats.Empty() {
46-
klog.V(4).Info("Destroyer made no new progress")
47-
} else {
45+
if !syncStats.Empty() {
4846
klog.Infof("Destroyer made new progress: %s", syncStats.String())
4947
objStatusMap.Log(klog.V(0))
5048
}
5149
// TODO: should we have a destroy duration metric?
5250
// We don't have the commit here, so we can't send the apply metric.
5351
// metrics.RecordApplyDuration(ctx, metrics.StatusTagKey(errs), commit, start)
5452
if err != nil {
55-
klog.Warningf("Failed to destroy declared resources: %v", err)
53+
klog.Warningf("Destroyer failed: %v", err)
5654
return err
5755
}
58-
klog.V(4).Info("Destroyer completed without error: all resources are deleted.")
59-
klog.V(3).Info("Applier stopped")
56+
klog.Info("Destroyer succeeded")
6057
return nil
6158
}

pkg/remediator/conflict/handler.go

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,8 +45,10 @@ type Handler interface {
4545

4646
// Record a management conflict error, including log and metric.
4747
func Record(ctx context.Context, handler Handler, err status.ManagementConflictError, commit string) {
48-
klog.Errorf("Management conflict detected. "+
49-
"Reconciler %q received a watch event for object %q, which is manage by namespace reconciler %q. ",
48+
klog.Errorf("Remediator detected a management conflict: "+
49+
"reconciler %q received a watch event for object %q, which is managed by namespace reconciler %q. "+
50+
"To resolve the conflict, remove the object from one of the sources of truth "+
51+
"so that the object is only managed by one reconciler.",
5052
err.DesiredManager(), err.ObjectID(), err.CurrentManager())
5153
handler.AddConflictError(err.ObjectID(), err)
5254
// TODO: Use separate metrics for management conflicts vs resource conflicts

pkg/remediator/reconcile/reconciler.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,9 @@ func (r *reconciler) Remediate(ctx context.Context, id core.ID, obj client.Objec
9898
switch err.Code() {
9999
case syncerclient.ResourceConflictCode:
100100
// Record cache conflict (create/delete/update failure due to found/not-found/resource-version)
101+
klog.Warningf("Remediator encountered a resource conflict: "+
102+
"%v. To resolve the conflict, the remediator will fetch the latest object from the cluster "+
103+
"and requeue it for remediation", err)
101104
metrics.RecordResourceConflict(ctx, commit)
102105
case status.ManagementConflictErrorCode:
103106
if mce, ok := err.(status.ManagementConflictError); ok {

pkg/remediator/watch/filteredwatcher.go

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -350,8 +350,12 @@ func (w *filteredWatcher) start(ctx context.Context, resourceVersion string) (bo
350350
if errors.Is(err, context.Canceled) || errors.Is(err, context.DeadlineExceeded) {
351351
return false, status.InternalWrapf(err, "failed to start watch for %s", w.gvk)
352352
} else if apierrors.IsNotFound(err) {
353+
statusErr := syncerclient.ConflictWatchResourceDoesNotExist(err, w.gvk)
354+
klog.Warningf("Remediator encountered a resource conflict: "+
355+
"%v. To resolve the conflict, the remediator will enqueue a resync "+
356+
"and restart the resource watch after the sync has succeeded.", statusErr)
353357
metrics.RecordResourceConflict(ctx, w.getLatestCommit())
354-
return false, syncerclient.ConflictWatchResourceDoesNotExist(err, w.gvk)
358+
return false, statusErr
355359
}
356360
return false, status.APIServerErrorf(err, "failed to start watch for %s", w.gvk)
357361
}

pkg/remediator/watch/manager.go

Lines changed: 26 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ import (
1919
"errors"
2020
"sync"
2121

22+
"golang.org/x/exp/maps"
2223
"k8s.io/apimachinery/pkg/api/meta"
2324
"k8s.io/apimachinery/pkg/labels"
2425
"k8s.io/apimachinery/pkg/runtime/schema"
@@ -147,7 +148,7 @@ func (m *Manager) AddWatches(ctx context.Context, gvkMap map[schema.GroupVersion
147148
defer m.mux.Unlock()
148149
m.watching = true
149150

150-
klog.V(3).Infof("AddWatches(%v)", gvkMap)
151+
klog.V(3).Infof("AddWatches(%v)", maps.Keys(gvkMap))
151152

152153
var startedWatches uint64
153154

@@ -165,7 +166,12 @@ func (m *Manager) AddWatches(ctx context.Context, gvkMap map[schema.GroupVersion
165166
if _, err := m.mapper.RESTMapping(gvk.GroupKind(), gvk.Version); err != nil {
166167
switch {
167168
case meta.IsNoMatchError(err):
168-
klog.Infof("Remediator skipped adding watch for resource %v: %v: resource watch will be started after apply is successful", gvk, err)
169+
statusErr := syncerclient.ConflictWatchResourceDoesNotExist(err, gvk)
170+
klog.Infof("Remediator skipped starting resource watch: "+
171+
"%v. The remediator will start the resource watch after the sync has succeeded.", statusErr)
172+
// This is expected behavior before a sync attempt.
173+
// It likely means a CR and CRD are in the same ApplySet.
174+
// So don't record a resource conflict metric or return an error here.
169175
default:
170176
errs = status.Append(errs, status.APIServerErrorWrap(err))
171177
}
@@ -180,9 +186,9 @@ func (m *Manager) AddWatches(ctx context.Context, gvkMap map[schema.GroupVersion
180186
}
181187

182188
if startedWatches > 0 {
183-
klog.Infof("The remediator made new progress: started %d new watches", startedWatches)
189+
klog.Infof("Remediator started %d new watches", startedWatches)
184190
} else {
185-
klog.V(4).Infof("The remediator made no new progress")
191+
klog.V(4).Infof("Remediator watches unchanged")
186192
}
187193
return errs
188194
}
@@ -200,7 +206,7 @@ func (m *Manager) UpdateWatches(ctx context.Context, gvkMap map[schema.GroupVers
200206
defer m.mux.Unlock()
201207
m.watching = true
202208

203-
klog.V(3).Infof("UpdateWatches(%v)", gvkMap)
209+
klog.V(3).Infof("UpdateWatches(%v)", maps.Keys(gvkMap))
204210

205211
m.needsUpdate = false
206212

@@ -228,8 +234,15 @@ func (m *Manager) UpdateWatches(ctx context.Context, gvkMap map[schema.GroupVers
228234
if _, err := m.mapper.RESTMapping(gvk.GroupKind(), gvk.Version); err != nil {
229235
switch {
230236
case meta.IsNoMatchError(err):
237+
statusErr := syncerclient.ConflictWatchResourceDoesNotExist(err, gvk)
238+
klog.Warningf("Remediator encountered a resource conflict: "+
239+
"%v. To resolve the conflict, the remediator will enqueue a resync "+
240+
"and restart the resource watch after the sync has succeeded.", statusErr)
241+
// This is unexpected behavior after a successful sync.
242+
// It likely means that some other controller deleted managed objects shortly after they were applied.
243+
// So record a resource conflict metric and return an error.
231244
metrics.RecordResourceConflict(ctx, commit)
232-
errs = status.Append(errs, syncerclient.ConflictWatchResourceDoesNotExist(err, gvk))
245+
errs = status.Append(errs, statusErr)
233246
default:
234247
errs = status.Append(errs, status.APIServerErrorWrap(err))
235248
}
@@ -244,9 +257,9 @@ func (m *Manager) UpdateWatches(ctx context.Context, gvkMap map[schema.GroupVers
244257
}
245258

246259
if startedWatches > 0 || stoppedWatches > 0 {
247-
klog.Infof("The remediator made new progress: started %d new watches, and stopped %d watches", startedWatches, stoppedWatches)
260+
klog.Infof("Remediator started %d new watches and stopped %d watches", startedWatches, stoppedWatches)
248261
} else {
249-
klog.V(4).Infof("The remediator made no new progress")
262+
klog.V(4).Infof("Remediator watches unchanged")
250263
}
251264
return errs
252265
}
@@ -284,10 +297,12 @@ func (m *Manager) startWatcher(ctx context.Context, gvk schema.GroupVersionKind,
284297
// threadsafe.
285298
func (m *Manager) runWatcher(ctx context.Context, r Runnable, gvk schema.GroupVersionKind) {
286299
if err := r.Run(ctx); err != nil {
287-
if errors.Is(err, context.Canceled) {
288-
klog.Infof("Watcher stopped for %s: %v", gvk, status.FormatSingleLine(err))
300+
// TODO: Make status.Error work with errors.Is unwrapping.
301+
// For now, check the Cause directly, to avoid logging a warning on shutdown.
302+
if errors.Is(err.Cause(), context.Canceled) {
303+
klog.Infof("Watcher stopped for %s: %v", gvk, context.Canceled)
289304
} else {
290-
klog.Warningf("Error running watcher for %s: %v", gvk, status.FormatSingleLine(err))
305+
klog.Warningf("Watcher errored for %s: %v", gvk, status.FormatSingleLine(err))
291306
}
292307
m.mux.Lock()
293308
delete(m.watcherMap, gvk)

pkg/resourcegroup/controllers/watch/manager.go

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ package watch
1616

1717
import (
1818
"context"
19+
"errors"
1920
"net/http"
2021
"sync"
2122

@@ -191,7 +192,12 @@ func (m *Manager) startWatcher(ctx context.Context, gvk schema.GroupVersionKind)
191192
// threadsafe.
192193
func (m *Manager) runWatcher(ctx context.Context, r Runnable, gvk schema.GroupVersionKind) {
193194
if err := r.Run(ctx); err != nil {
194-
klog.Warningf("Error running watcher for %s: %v", gvk.String(), err)
195+
// Avoid logging a warning on shutdown.
196+
if errors.Is(err, context.Canceled) {
197+
klog.Infof("Watcher stopped for %s: %v", gvk, context.Canceled)
198+
} else {
199+
klog.Warningf("Watcher errored for %s: %v", gvk, err)
200+
}
195201
m.mux.Lock()
196202
delete(m.watcherMap, gvk)
197203
m.needsUpdate = true

0 commit comments

Comments
 (0)