@@ -95,6 +95,37 @@ type Reflector struct {
95
95
// etcd, which is significantly less efficient and may lead to serious performance and
96
96
// scalability problems.
97
97
WatchListPageSize int64
98
+ // Called whenever the ListAndWatch drops the connection with an error.
99
+ watchErrorHandler WatchErrorHandler
100
+ }
101
+
102
+ // The WatchErrorHandler is called whenever ListAndWatch drops the
103
+ // connection with an error. After calling this handler, the informer
104
+ // will backoff and retry.
105
+ //
106
+ // The default implementation looks at the error type and tries to log
107
+ // the error message at an appropriate level.
108
+ //
109
+ // Implementations of this handler may display the error message in other
110
+ // ways. Implementations should return quickly - any expensive processing
111
+ // should be offloaded.
112
+ type WatchErrorHandler func (r * Reflector , err error )
113
+
114
+ // DefaultWatchErrorHandler is the default implementation of WatchErrorHandler
115
+ func DefaultWatchErrorHandler (r * Reflector , err error ) {
116
+ switch {
117
+ case isExpiredError (err ):
118
+ // Don't set LastSyncResourceVersionExpired - LIST call with ResourceVersion=RV already
119
+ // has a semantic that it returns data at least as fresh as provided RV.
120
+ // So first try to LIST with setting RV to resource version of last observed object.
121
+ klog .V (4 ).Infof ("%s: watch of %v closed with: %v" , r .name , r .expectedTypeName , err )
122
+ case err == io .EOF :
123
+ // watch closed normally
124
+ case err == io .ErrUnexpectedEOF :
125
+ klog .V (1 ).Infof ("%s: Watch for %v closed with unexpected EOF: %v" , r .name , r .expectedTypeName , err )
126
+ default :
127
+ utilruntime .HandleError (fmt .Errorf ("%s: Failed to watch %v: %v" , r .name , r .expectedTypeName , err ))
128
+ }
98
129
}
99
130
100
131
var (
@@ -135,9 +166,10 @@ func NewNamedReflector(name string, lw ListerWatcher, expectedType interface{},
135
166
// We used to make the call every 1sec (1 QPS), the goal here is to achieve ~98% traffic reduction when
136
167
// API server is not healthy. With these parameters, backoff will stop at [30,60) sec interval which is
137
168
// 0.22 QPS. If we don't backoff for 2min, assume API server is healthy and we reset the backoff.
138
- backoffManager : wait .NewExponentialBackoffManager (800 * time .Millisecond , 30 * time .Second , 2 * time .Minute , 2.0 , 1.0 , realClock ),
139
- resyncPeriod : resyncPeriod ,
140
- clock : realClock ,
169
+ backoffManager : wait .NewExponentialBackoffManager (800 * time .Millisecond , 30 * time .Second , 2 * time .Minute , 2.0 , 1.0 , realClock ),
170
+ resyncPeriod : resyncPeriod ,
171
+ clock : realClock ,
172
+ watchErrorHandler : WatchErrorHandler (DefaultWatchErrorHandler ),
141
173
}
142
174
r .setExpectedType (expectedType )
143
175
return r
@@ -175,7 +207,7 @@ func (r *Reflector) Run(stopCh <-chan struct{}) {
175
207
klog .V (2 ).Infof ("Starting reflector %s (%s) from %s" , r .expectedTypeName , r .resyncPeriod , r .name )
176
208
wait .BackoffUntil (func () {
177
209
if err := r .ListAndWatch (stopCh ); err != nil {
178
- utilruntime . HandleError ( err )
210
+ r . watchErrorHandler ( r , err )
179
211
}
180
212
}, r .backoffManager , true , stopCh )
181
213
klog .V (2 ).Infof ("Stopping reflector %s (%s) from %s" , r .expectedTypeName , r .resyncPeriod , r .name )
@@ -275,7 +307,7 @@ func (r *Reflector) ListAndWatch(stopCh <-chan struct{}) error {
275
307
case <- listCh :
276
308
}
277
309
if err != nil {
278
- return fmt .Errorf ("%s: Failed to list %v: %v" , r . name , r .expectedTypeName , err )
310
+ return fmt .Errorf ("failed to list %v: %v" , r .expectedTypeName , err )
279
311
}
280
312
281
313
// We check if the list was paginated and if so set the paginatedResult based on that.
@@ -296,17 +328,17 @@ func (r *Reflector) ListAndWatch(stopCh <-chan struct{}) error {
296
328
initTrace .Step ("Objects listed" )
297
329
listMetaInterface , err := meta .ListAccessor (list )
298
330
if err != nil {
299
- return fmt .Errorf ("%s: Unable to understand list result %#v: %v" , r . name , list , err )
331
+ return fmt .Errorf ("unable to understand list result %#v: %v" , list , err )
300
332
}
301
333
resourceVersion = listMetaInterface .GetResourceVersion ()
302
334
initTrace .Step ("Resource version extracted" )
303
335
items , err := meta .ExtractList (list )
304
336
if err != nil {
305
- return fmt .Errorf ("%s: Unable to understand list result %#v (%v)" , r . name , list , err )
337
+ return fmt .Errorf ("unable to understand list result %#v (%v)" , list , err )
306
338
}
307
339
initTrace .Step ("Objects extracted" )
308
340
if err := r .syncWith (items , resourceVersion ); err != nil {
309
- return fmt .Errorf ("%s: Unable to sync list result: %v" , r . name , err )
341
+ return fmt .Errorf ("unable to sync list result: %v" , err )
310
342
}
311
343
initTrace .Step ("SyncWith done" )
312
344
r .setLastSyncResourceVersion (resourceVersion )
@@ -366,19 +398,6 @@ func (r *Reflector) ListAndWatch(stopCh <-chan struct{}) error {
366
398
367
399
w , err := r .listerWatcher .Watch (options )
368
400
if err != nil {
369
- switch {
370
- case isExpiredError (err ):
371
- // Don't set LastSyncResourceVersionExpired - LIST call with ResourceVersion=RV already
372
- // has a semantic that it returns data at least as fresh as provided RV.
373
- // So first try to LIST with setting RV to resource version of last observed object.
374
- klog .V (4 ).Infof ("%s: watch of %v closed with: %v" , r .name , r .expectedTypeName , err )
375
- case err == io .EOF :
376
- // watch closed normally
377
- case err == io .ErrUnexpectedEOF :
378
- klog .V (1 ).Infof ("%s: Watch for %v closed with unexpected EOF: %v" , r .name , r .expectedTypeName , err )
379
- default :
380
- utilruntime .HandleError (fmt .Errorf ("%s: Failed to watch %v: %v" , r .name , r .expectedTypeName , err ))
381
- }
382
401
// If this is "connection refused" error, it means that most likely apiserver is not responsive.
383
402
// It doesn't make sense to re-list all objects because most likely we will be able to restart
384
403
// watch where we ended.
@@ -387,7 +406,7 @@ func (r *Reflector) ListAndWatch(stopCh <-chan struct{}) error {
387
406
time .Sleep (time .Second )
388
407
continue
389
408
}
390
- return nil
409
+ return err
391
410
}
392
411
393
412
if err := r .watchHandler (w , & resourceVersion , resyncerrc , stopCh ); err != nil {
0 commit comments