@@ -75,6 +75,9 @@ type Reflector struct {
75
75
ShouldResync func () bool
76
76
// clock allows tests to manipulate time
77
77
clock clock.Clock
78
+ // paginatedResult defines whether pagination should be forced for list calls.
79
+ // It is set based on the result of the initial list call.
80
+ paginatedResult bool
78
81
// lastSyncResourceVersion is the resource version token last
79
82
// observed when doing a sync with the underlying store
80
83
// it is thread safe, but not synchronized with the underlying store
@@ -85,7 +88,12 @@ type Reflector struct {
85
88
// lastSyncResourceVersionMutex guards read/write access to lastSyncResourceVersion
86
89
lastSyncResourceVersionMutex sync.RWMutex
87
90
// WatchListPageSize is the requested chunk size of initial and resync watch lists.
88
- // Defaults to pager.PageSize.
91
+ // If unset, for consistent reads (RV="") or reads that opt-into arbitrarily old data
92
+ // (RV="0") it will default to pager.PageSize, for the rest (RV != "" && RV != "0")
93
+ // it will turn off pagination to allow serving them from watch cache.
94
+ // NOTE: It should be used carefully as paginated lists are always served directly from
95
+ // etcd, which is significantly less efficient and may lead to serious performance and
96
+ // scalability problems.
89
97
WatchListPageSize int64
90
98
}
91
99
@@ -204,6 +212,7 @@ func (r *Reflector) ListAndWatch(stopCh <-chan struct{}) error {
204
212
initTrace := trace .New ("Reflector ListAndWatch" , trace.Field {"name" , r .name })
205
213
defer initTrace .LogIfLong (10 * time .Second )
206
214
var list runtime.Object
215
+ var paginatedResult bool
207
216
var err error
208
217
listCh := make (chan struct {}, 1 )
209
218
panicCh := make (chan interface {}, 1 )
@@ -218,19 +227,38 @@ func (r *Reflector) ListAndWatch(stopCh <-chan struct{}) error {
218
227
pager := pager .New (pager .SimplePageFunc (func (opts metav1.ListOptions ) (runtime.Object , error ) {
219
228
return r .listerWatcher .List (opts )
220
229
}))
221
- if r .WatchListPageSize != 0 {
230
+ switch {
231
+ case r .WatchListPageSize != 0 :
222
232
pager .PageSize = r .WatchListPageSize
233
+ case r .paginatedResult :
234
+ // We got a paginated result initially. Assume this resource and server honor
235
+ // paging requests (i.e. watch cache is probably disabled) and leave the default
236
+ // pager size set.
237
+ case options .ResourceVersion != "" && options .ResourceVersion != "0" :
238
+ // User didn't explicitly request pagination.
239
+ //
240
+ // With ResourceVersion != "", we have a possibility to list from watch cache,
241
+ // but we do that (for ResourceVersion != "0") only if Limit is unset.
242
+ // To avoid thundering herd on etcd (e.g. on master upgrades), we explicitly
243
+ // switch off pagination to force listing from watch cache (if enabled).
244
+ // With the existing semantic of RV (result is at least as fresh as provided RV),
245
+ // this is correct and doesn't lead to going back in time.
246
+ //
247
+ // We also don't turn off pagination for ResourceVersion="0", since watch cache
248
+ // is ignoring Limit in that case anyway, and if watch cache is not enabled
249
+ // we don't introduce regression.
250
+ pager .PageSize = 0
223
251
}
224
252
225
- list , err = pager .List (context .Background (), options )
253
+ list , paginatedResult , err = pager .List (context .Background (), options )
226
254
if isExpiredError (err ) {
227
255
r .setIsLastSyncResourceVersionExpired (true )
228
256
// Retry immediately if the resource version used to list is expired.
229
257
// The pager already falls back to full list if paginated list calls fail due to an "Expired" error on
230
258
// continuation pages, but the pager might not be enabled, or the full list might fail because the
231
259
// resource version it is listing at is expired, so we need to fallback to resourceVersion="" in all
232
260
// to recover and ensure the reflector makes forward progress.
233
- list , err = pager .List (context .Background (), metav1.ListOptions {ResourceVersion : r .relistResourceVersion ()})
261
+ list , paginatedResult , err = pager .List (context .Background (), metav1.ListOptions {ResourceVersion : r .relistResourceVersion ()})
234
262
}
235
263
close (listCh )
236
264
}()
@@ -244,6 +272,21 @@ func (r *Reflector) ListAndWatch(stopCh <-chan struct{}) error {
244
272
if err != nil {
245
273
return fmt .Errorf ("%s: Failed to list %v: %v" , r .name , r .expectedTypeName , err )
246
274
}
275
+
276
+ // We check if the list was paginated and if so set the paginatedResult based on that.
277
+ // However, we want to do that only for the initial list (which is the only case
278
+ // when we set ResourceVersion="0"). The reasoning behind it is that later, in some
279
+ // situations we may force listing directly from etcd (by setting ResourceVersion="")
280
+ // which will return paginated result, even if watch cache is enabled. However, in
281
+ // that case, we still want to prefer sending requests to watch cache if possible.
282
+ //
283
+ // Paginated result returned for request with ResourceVersion="0" mean that watch
284
+ // cache is disabled and there are a lot of objects of a given type. In such case,
285
+ // there is no need to prefer listing from watch cache.
286
+ if options .ResourceVersion == "0" && paginatedResult {
287
+ r .paginatedResult = true
288
+ }
289
+
247
290
r .setIsLastSyncResourceVersionExpired (false ) // list was successful
248
291
initTrace .Step ("Objects listed" )
249
292
listMetaInterface , err := meta .ListAccessor (list )
@@ -320,7 +363,9 @@ func (r *Reflector) ListAndWatch(stopCh <-chan struct{}) error {
320
363
if err != nil {
321
364
switch {
322
365
case isExpiredError (err ):
323
- r .setIsLastSyncResourceVersionExpired (true )
366
+ // Don't set LastSyncResourceVersionExpired - LIST call with ResourceVersion=RV already
367
+ // has a semantic that it returns data at least as fresh as provided RV.
368
+ // So first try to LIST with setting RV to resource version of last observed object.
324
369
klog .V (4 ).Infof ("%s: watch of %v closed with: %v" , r .name , r .expectedTypeName , err )
325
370
case err == io .EOF :
326
371
// watch closed normally
@@ -344,8 +389,10 @@ func (r *Reflector) ListAndWatch(stopCh <-chan struct{}) error {
344
389
if err != errorStopRequested {
345
390
switch {
346
391
case isExpiredError (err ):
347
- r .setIsLastSyncResourceVersionExpired (true )
348
- klog .V (4 ).Infof ("%s: watch of %v ended with: %v" , r .name , r .expectedTypeName , err )
392
+ // Don't set LastSyncResourceVersionExpired - LIST call with ResourceVersion=RV already
393
+ // has a semantic that it returns data at least as fresh as provided RV.
394
+ // So first try to LIST with setting RV to resource version of last observed object.
395
+ klog .V (4 ).Infof ("%s: watch of %v closed with: %v" , r .name , r .expectedTypeName , err )
349
396
default :
350
397
klog .Warningf ("%s: watch of %v ended with: %v" , r .name , r .expectedTypeName , err )
351
398
}
0 commit comments