Skip to content

Commit d90b37f

Browse files
authored
Merge pull request kubernetes#88714 from MikeSpreitzer/apf-finer-metrics2
Extend API Priority and Fairness metrics
2 parents ef672c1 + c7b098a commit d90b37f

File tree

11 files changed

+186
-71
lines changed

11 files changed

+186
-71
lines changed

staging/src/k8s.io/apiserver/pkg/util/flowcontrol/apf_controller.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -648,7 +648,7 @@ func (cfgCtl *configController) startRequest(ctx context.Context, rd RequestDige
648648
}
649649
startWaitingTime = time.Now()
650650
klog.V(7).Infof("startRequest(%#+v) => fsName=%q, distMethod=%#+v, plName=%q, numQueues=%d", rd, fs.Name, fs.Spec.DistinguisherMethod, plName, numQueues)
651-
req, idle := plState.queues.StartRequest(ctx, hashValue, rd.RequestInfo, rd.User)
651+
req, idle := plState.queues.StartRequest(ctx, hashValue, fs.Name, rd.RequestInfo, rd.User)
652652
if idle {
653653
cfgCtl.maybeReapLocked(plName, plState)
654654
}

staging/src/k8s.io/apiserver/pkg/util/flowcontrol/apf_filter.go

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -21,13 +21,6 @@ import (
2121
"strconv"
2222
"time"
2323

24-
// TODO: decide whether to use the existing metrics, which
25-
// categorize according to mutating vs readonly, or make new
26-
// metrics because this filter does not pay attention to that
27-
// distinction
28-
29-
// "k8s.io/apiserver/pkg/endpoints/metrics"
30-
3124
"k8s.io/apimachinery/pkg/util/clock"
3225
"k8s.io/apiserver/pkg/util/flowcontrol/counter"
3326
fq "k8s.io/apiserver/pkg/util/flowcontrol/fairqueuing"
@@ -109,6 +102,7 @@ func (cfgCtl *configController) Handle(ctx context.Context, requestDigest Reques
109102
if queued {
110103
metrics.ObserveWaitingDuration(pl.Name, fs.Name, strconv.FormatBool(req != nil), time.Since(startWaitingTime))
111104
}
105+
metrics.AddDispatch(pl.Name, fs.Name)
112106
executed = true
113107
startExecutionTime := time.Now()
114108
execFn()

staging/src/k8s.io/apiserver/pkg/util/flowcontrol/controller_test.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -115,11 +115,11 @@ func (cqs *ctlTestQueueSet) IsIdle() bool {
115115
return cqs.countActive == 0
116116
}
117117

118-
func (cqs *ctlTestQueueSet) StartRequest(ctx context.Context, hashValue uint64, descr1, descr2 interface{}) (req fq.Request, idle bool) {
118+
func (cqs *ctlTestQueueSet) StartRequest(ctx context.Context, hashValue uint64, fsName string, descr1, descr2 interface{}) (req fq.Request, idle bool) {
119119
cqs.cts.lock.Lock()
120120
defer cqs.cts.lock.Unlock()
121121
cqs.countActive++
122-
cqs.cts.t.Logf("Queued %#+v %#+v for %p QS=%s, countActive:=%d", descr1, descr2, cqs, cqs.qc.Name, cqs.countActive)
122+
cqs.cts.t.Logf("Queued %q %#+v %#+v for %p QS=%s, countActive:=%d", fsName, descr1, descr2, cqs, cqs.qc.Name, cqs.countActive)
123123
return &ctlTestRequest{cqs, cqs.qc.Name, descr1, descr2}, false
124124
}
125125

staging/src/k8s.io/apiserver/pkg/util/flowcontrol/fairqueuing/interface.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ type QueueSet interface {
7676
// returned bool indicates whether the QueueSet was idle at the
7777
// moment of the return. Otherwise idle==false and the client
7878
// must call the Wait method of the Request exactly once.
79-
StartRequest(ctx context.Context, hashValue uint64, descr1, descr2 interface{}) (req Request, idle bool)
79+
StartRequest(ctx context.Context, hashValue uint64, fsName string, descr1, descr2 interface{}) (req Request, idle bool)
8080
}
8181

8282
// Request represents the remainder of the handling of one request

staging/src/k8s.io/apiserver/pkg/util/flowcontrol/fairqueuing/queueset/BUILD

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ go_test(
3333
"//staging/src/k8s.io/apiserver/pkg/util/flowcontrol/fairqueuing:go_default_library",
3434
"//staging/src/k8s.io/apiserver/pkg/util/flowcontrol/fairqueuing/testing:go_default_library",
3535
"//staging/src/k8s.io/apiserver/pkg/util/flowcontrol/fairqueuing/testing/clock:go_default_library",
36+
"//staging/src/k8s.io/apiserver/pkg/util/flowcontrol/metrics:go_default_library",
3637
"//vendor/k8s.io/klog:go_default_library",
3738
],
3839
)

staging/src/k8s.io/apiserver/pkg/util/flowcontrol/fairqueuing/queueset/queueset.go

Lines changed: 27 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -218,10 +218,10 @@ const (
218218

219219
// StartRequest begins the process of handling a request. We take the
220220
// approach of updating the metrics about total requests queued and
221-
// executing on each path out of this method and Request::Wait. We do
222-
// not update those metrics in lower level functions because there can
223-
// be multiple lower level changes in one invocation here.
224-
func (qs *queueSet) StartRequest(ctx context.Context, hashValue uint64, descr1, descr2 interface{}) (fq.Request, bool) {
221+
// executing at each point where there is a change in that quantity,
222+
// because the metrics --- and only the metrics --- track that
223+
// quantity per FlowSchema.
224+
func (qs *queueSet) StartRequest(ctx context.Context, hashValue uint64, fsName string, descr1, descr2 interface{}) (fq.Request, bool) {
225225
qs.lockAndSyncTime()
226226
defer qs.lock.Unlock()
227227
var req *request
@@ -231,11 +231,11 @@ func (qs *queueSet) StartRequest(ctx context.Context, hashValue uint64, descr1,
231231
// Apply only concurrency limit, if zero queues desired
232232
if qs.qCfg.DesiredNumQueues < 1 {
233233
if qs.totRequestsExecuting >= qs.dCfg.ConcurrencyLimit {
234-
klog.V(5).Infof("QS(%s): rejecting request %#+v %#+v because %d are executing and the limit is %d", qs.qCfg.Name, descr1, descr2, qs.totRequestsExecuting, qs.dCfg.ConcurrencyLimit)
234+
klog.V(5).Infof("QS(%s): rejecting request %q %#+v %#+v because %d are executing and the limit is %d", qs.qCfg.Name, fsName, descr1, descr2, qs.totRequestsExecuting, qs.dCfg.ConcurrencyLimit)
235+
metrics.AddReject(qs.qCfg.Name, fsName, "concurrency-limit")
235236
return nil, qs.isIdleLocked()
236237
}
237-
req = qs.dispatchSansQueueLocked(ctx, descr1, descr2)
238-
metrics.UpdateFlowControlRequestsExecuting(qs.qCfg.Name, qs.totRequestsExecuting)
238+
req = qs.dispatchSansQueueLocked(ctx, fsName, descr1, descr2)
239239
return req, false
240240
}
241241

@@ -246,13 +246,12 @@ func (qs *queueSet) StartRequest(ctx context.Context, hashValue uint64, descr1,
246246
// 3) Reject current request if there is not enough concurrency shares and
247247
// we are at max queue length
248248
// 4) If not rejected, create a request and enqueue
249-
req = qs.timeoutOldRequestsAndRejectOrEnqueueLocked(ctx, hashValue, descr1, descr2)
250-
defer metrics.UpdateFlowControlRequestsInQueue(qs.qCfg.Name, qs.totRequestsWaiting)
249+
req = qs.timeoutOldRequestsAndRejectOrEnqueueLocked(ctx, hashValue, fsName, descr1, descr2)
251250
// req == nil means that the request was rejected - no remaining
252251
// concurrency shares and at max queue length already
253252
if req == nil {
254-
klog.V(5).Infof("QS(%s): rejecting request %#+v %#+v due to queue full", qs.qCfg.Name, descr1, descr2)
255-
metrics.AddReject(qs.qCfg.Name, "queue-full")
253+
klog.V(5).Infof("QS(%s): rejecting request %q %#+v %#+v due to queue full", qs.qCfg.Name, fsName, descr1, descr2)
254+
metrics.AddReject(qs.qCfg.Name, fsName, "queue-full")
256255
return nil, qs.isIdleLocked()
257256
}
258257

@@ -266,7 +265,6 @@ func (qs *queueSet) StartRequest(ctx context.Context, hashValue uint64, descr1,
266265
// fair queuing technique to pick a queue and dispatch a
267266
// request from that queue.
268267
qs.dispatchAsMuchAsPossibleLocked()
269-
defer metrics.UpdateFlowControlRequestsExecuting(qs.qCfg.Name, qs.totRequestsExecuting)
270268

271269
// ========================================================================
272270
// Step 3:
@@ -288,7 +286,7 @@ func (qs *queueSet) StartRequest(ctx context.Context, hashValue uint64, descr1,
288286
// known that the count does not need to be accurate.
289287
// BTW, the count only needs to be accurate in a test that
290288
// uses FakeEventClock::Run().
291-
klog.V(6).Infof("QS(%s): Context of request %#+v %#+v is Done", qs.qCfg.Name, descr1, descr2)
289+
klog.V(6).Infof("QS(%s): Context of request %q %#+v %#+v is Done", qs.qCfg.Name, fsName, descr1, descr2)
292290
qs.cancelWait(req)
293291
qs.goroutineDoneOrBlocked()
294292
}()
@@ -329,7 +327,7 @@ func (req *request) wait() (bool, bool) {
329327
switch decision {
330328
case decisionReject:
331329
klog.V(5).Infof("QS(%s): request %#+v %#+v timed out after being enqueued\n", qs.qCfg.Name, req.descr1, req.descr2)
332-
metrics.AddReject(qs.qCfg.Name, "time-out")
330+
metrics.AddReject(qs.qCfg.Name, req.fsName, "time-out")
333331
return false, qs.isIdleLocked()
334332
case decisionCancel:
335333
// TODO(aaron-prindle) add metrics for this case
@@ -400,19 +398,20 @@ func (qs *queueSet) getVirtualTimeRatioLocked() float64 {
400398
// returns the enqueud request on a successful enqueue
401399
// returns nil in the case that there is no available concurrency or
402400
// the queuelengthlimit has been reached
403-
func (qs *queueSet) timeoutOldRequestsAndRejectOrEnqueueLocked(ctx context.Context, hashValue uint64, descr1, descr2 interface{}) *request {
401+
func (qs *queueSet) timeoutOldRequestsAndRejectOrEnqueueLocked(ctx context.Context, hashValue uint64, fsName string, descr1, descr2 interface{}) *request {
404402
// Start with the shuffle sharding, to pick a queue.
405403
queueIdx := qs.chooseQueueIndexLocked(hashValue, descr1, descr2)
406404
queue := qs.queues[queueIdx]
407405
// The next step is the logic to reject requests that have been waiting too long
408-
qs.removeTimedOutRequestsFromQueueLocked(queue)
406+
qs.removeTimedOutRequestsFromQueueLocked(queue, fsName)
409407
// NOTE: currently timeout is only checked for each new request. This means that there can be
410408
// requests that are in the queue longer than the timeout if there are no new requests
411409
// We prefer the simplicity over the promptness, at least for now.
412410

413411
// Create a request and enqueue
414412
req := &request{
415413
qs: qs,
414+
fsName: fsName,
416415
ctx: ctx,
417416
decision: lockingpromise.NewWriteOnce(&qs.lock, qs.counter),
418417
arrivalTime: qs.clock.Now(),
@@ -423,7 +422,7 @@ func (qs *queueSet) timeoutOldRequestsAndRejectOrEnqueueLocked(ctx context.Conte
423422
if ok := qs.rejectOrEnqueueLocked(req); !ok {
424423
return nil
425424
}
426-
metrics.ObserveQueueLength(qs.qCfg.Name, len(queue.requests))
425+
metrics.ObserveQueueLength(qs.qCfg.Name, fsName, len(queue.requests))
427426
return req
428427
}
429428

@@ -446,7 +445,7 @@ func (qs *queueSet) chooseQueueIndexLocked(hashValue uint64, descr1, descr2 inte
446445

447446
// removeTimedOutRequestsFromQueueLocked rejects old requests that have been enqueued
448447
// past the requestWaitLimit
449-
func (qs *queueSet) removeTimedOutRequestsFromQueueLocked(queue *queue) {
448+
func (qs *queueSet) removeTimedOutRequestsFromQueueLocked(queue *queue, fsName string) {
450449
timeoutIdx := -1
451450
now := qs.clock.Now()
452451
reqs := queue.requests
@@ -461,6 +460,7 @@ func (qs *queueSet) removeTimedOutRequestsFromQueueLocked(queue *queue) {
461460
req.decision.SetLocked(decisionReject)
462461
// get index for timed out requests
463462
timeoutIdx = i
463+
metrics.AddRequestsInQueues(qs.qCfg.Name, req.fsName, -1)
464464
} else {
465465
break
466466
}
@@ -505,6 +505,7 @@ func (qs *queueSet) enqueueLocked(request *request) {
505505
}
506506
queue.Enqueue(request)
507507
qs.totRequestsWaiting++
508+
metrics.AddRequestsInQueues(qs.qCfg.Name, request.fsName, 1)
508509
}
509510

510511
// dispatchAsMuchAsPossibleLocked runs a loop, as long as there
@@ -522,10 +523,11 @@ func (qs *queueSet) dispatchAsMuchAsPossibleLocked() {
522523
}
523524
}
524525

525-
func (qs *queueSet) dispatchSansQueueLocked(ctx context.Context, descr1, descr2 interface{}) *request {
526+
func (qs *queueSet) dispatchSansQueueLocked(ctx context.Context, fsName string, descr1, descr2 interface{}) *request {
526527
now := qs.clock.Now()
527528
req := &request{
528529
qs: qs,
530+
fsName: fsName,
529531
ctx: ctx,
530532
startTime: now,
531533
decision: lockingpromise.NewWriteOnce(&qs.lock, qs.counter),
@@ -535,8 +537,9 @@ func (qs *queueSet) dispatchSansQueueLocked(ctx context.Context, descr1, descr2
535537
}
536538
req.decision.SetLocked(decisionExecute)
537539
qs.totRequestsExecuting++
540+
metrics.AddRequestsExecuting(qs.qCfg.Name, fsName, 1)
538541
if klog.V(5) {
539-
klog.Infof("QS(%s) at r=%s v=%.9fs: immediate dispatch of request %#+v %#+v, qs will have %d executing", qs.qCfg.Name, now.Format(nsTimeFmt), qs.virtualTime, descr1, descr2, qs.totRequestsExecuting)
542+
klog.Infof("QS(%s) at r=%s v=%.9fs: immediate dispatch of request %q %#+v %#+v, qs will have %d executing", qs.qCfg.Name, now.Format(nsTimeFmt), qs.virtualTime, fsName, descr1, descr2, qs.totRequestsExecuting)
540543
}
541544
return req
542545
}
@@ -563,6 +566,8 @@ func (qs *queueSet) dispatchLocked() bool {
563566
qs.totRequestsWaiting--
564567
qs.totRequestsExecuting++
565568
queue.requestsExecuting++
569+
metrics.AddRequestsInQueues(qs.qCfg.Name, request.fsName, -1)
570+
metrics.AddRequestsExecuting(qs.qCfg.Name, request.fsName, 1)
566571
if klog.V(6) {
567572
klog.Infof("QS(%s) at r=%s v=%.9fs: dispatching request %#+v %#+v from queue %d with virtual start time %.9fs, queue will have %d waiting & %d executing", qs.qCfg.Name, request.startTime.Format(nsTimeFmt), qs.virtualTime, request.descr1, request.descr2, queue.index, queue.virtualStart, len(queue.requests), queue.requestsExecuting)
568573
}
@@ -590,6 +595,7 @@ func (qs *queueSet) cancelWait(req *request) {
590595
// remove the request
591596
queue.requests = append(queue.requests[:i], queue.requests[i+1:]...)
592597
qs.totRequestsWaiting--
598+
metrics.AddRequestsInQueues(qs.qCfg.Name, req.fsName, -1)
593599
break
594600
}
595601
}
@@ -634,8 +640,6 @@ func (qs *queueSet) finishRequestAndDispatchAsMuchAsPossible(req *request) bool
634640

635641
qs.finishRequestLocked(req)
636642
qs.dispatchAsMuchAsPossibleLocked()
637-
metrics.UpdateFlowControlRequestsInQueue(qs.qCfg.Name, qs.totRequestsWaiting)
638-
metrics.UpdateFlowControlRequestsExecuting(qs.qCfg.Name, qs.totRequestsExecuting)
639643
return qs.isIdleLocked()
640644
}
641645

@@ -644,6 +648,7 @@ func (qs *queueSet) finishRequestAndDispatchAsMuchAsPossible(req *request) bool
644648
// callback updates important state in the queueSet
645649
func (qs *queueSet) finishRequestLocked(r *request) {
646650
qs.totRequestsExecuting--
651+
metrics.AddRequestsExecuting(qs.qCfg.Name, r.fsName, -1)
647652

648653
if r.queue == nil {
649654
if klog.V(6) {

0 commit comments

Comments
 (0)