|
| 1 | +// Copyright 2025 The Cockroach Authors. |
| 2 | +// |
| 3 | +// Use of this software is governed by the CockroachDB Software License |
| 4 | +// included in the /LICENSE file. |
| 5 | + |
| 6 | +package admission |
| 7 | + |
| 8 | +import ( |
| 9 | + "time" |
| 10 | + |
| 11 | + "github.com/cockroachdb/cockroach/pkg/settings/cluster" |
| 12 | + "github.com/cockroachdb/cockroach/pkg/util/admission/admissionpb" |
| 13 | + "github.com/cockroachdb/cockroach/pkg/util/log" |
| 14 | + "github.com/cockroachdb/cockroach/pkg/util/metric" |
| 15 | +) |
| 16 | + |
| 17 | +func makeElasticCPUGrantCoordinator( |
| 18 | + ambientCtx log.AmbientContext, st *cluster.Settings, registry *metric.Registry, |
| 19 | +) *ElasticCPUGrantCoordinator { |
| 20 | + schedulerLatencyListenerMetrics := makeSchedulerLatencyListenerMetrics() |
| 21 | + registry.AddMetricStruct(schedulerLatencyListenerMetrics) |
| 22 | + elasticCPUGranterMetrics := makeElasticCPUGranterMetrics() |
| 23 | + registry.AddMetricStruct(elasticCPUGranterMetrics) |
| 24 | + |
| 25 | + elasticWorkQueueMetrics := makeWorkQueueMetrics("elastic-cpu", registry, |
| 26 | + admissionpb.BulkNormalPri, admissionpb.NormalPri) |
| 27 | + |
| 28 | + elasticCPUGranter := newElasticCPUGranter(ambientCtx, st, elasticCPUGranterMetrics) |
| 29 | + schedulerLatencyListener := newSchedulerLatencyListener(ambientCtx, st, schedulerLatencyListenerMetrics, elasticCPUGranter) |
| 30 | + |
| 31 | + elasticCPUInternalWorkQueue := &WorkQueue{} |
| 32 | + initWorkQueue(elasticCPUInternalWorkQueue, ambientCtx, KVWork, "kv-elastic-cpu-queue", elasticCPUGranter, st, |
| 33 | + elasticWorkQueueMetrics, |
| 34 | + workQueueOptions{usesTokens: true}, nil /* knobs */) // will be closed by the embedding *ElasticCPUWorkQueue |
| 35 | + elasticCPUWorkQueue := makeElasticCPUWorkQueue(st, elasticCPUInternalWorkQueue, elasticCPUGranter, elasticCPUGranterMetrics) |
| 36 | + elasticCPUGrantCoordinator := newElasticCPUGrantCoordinator(elasticCPUGranter, elasticCPUWorkQueue, schedulerLatencyListener) |
| 37 | + elasticCPUGranter.setRequester(elasticCPUInternalWorkQueue) |
| 38 | + schedulerLatencyListener.setCoord(elasticCPUGrantCoordinator) |
| 39 | + return elasticCPUGrantCoordinator |
| 40 | +} |
| 41 | + |
| 42 | +// ElasticCPUGrantCoordinator coordinates grants for elastic CPU tokens, it has |
| 43 | +// a single granter-requester pair. Since it's used for elastic CPU work, and |
| 44 | +// the total allotment of CPU available for such work is reduced before getting |
| 45 | +// close to CPU saturation (we observe 1ms+ p99 scheduling latencies when |
| 46 | +// running at 65% utilization on 8vCPU machines, which is enough to affect |
| 47 | +// foreground latencies), we don't want it to serve as a gatekeeper for |
| 48 | +// SQL-level admission. All this informs why its structured as a separate grant |
| 49 | +// coordinator. |
| 50 | +// |
| 51 | +// TODO(irfansharif): Ideally we wouldn't use this separate |
| 52 | +// ElasticGrantCoordinator and just make this part of the one GrantCoordinator |
| 53 | +// above but given we're dealing with a different workClass (elasticWorkClass) |
| 54 | +// but for an existing WorkKind (KVWork), and not all APIs on the grant |
| 55 | +// coordinator currently segment across the two, it was easier to copy over some |
| 56 | +// of the mediating code instead (grant chains also don't apply in this scheme). |
| 57 | +// Try to do something better here and revisit the existing abstractions; see |
| 58 | +// github.com/cockroachdb/cockroach/pull/86638#pullrequestreview-1084437330. |
| 59 | +type ElasticCPUGrantCoordinator struct { |
| 60 | + SchedulerLatencyListener SchedulerLatencyListener |
| 61 | + ElasticCPUWorkQueue *ElasticCPUWorkQueue |
| 62 | + elasticCPUGranter *elasticCPUGranter |
| 63 | +} |
| 64 | + |
| 65 | +func newElasticCPUGrantCoordinator( |
| 66 | + elasticCPUGranter *elasticCPUGranter, |
| 67 | + elasticCPUWorkQueue *ElasticCPUWorkQueue, |
| 68 | + listener *schedulerLatencyListener, |
| 69 | +) *ElasticCPUGrantCoordinator { |
| 70 | + return &ElasticCPUGrantCoordinator{ |
| 71 | + elasticCPUGranter: elasticCPUGranter, |
| 72 | + ElasticCPUWorkQueue: elasticCPUWorkQueue, |
| 73 | + SchedulerLatencyListener: listener, |
| 74 | + } |
| 75 | +} |
| 76 | + |
| 77 | +func (e *ElasticCPUGrantCoordinator) close() { |
| 78 | + e.ElasticCPUWorkQueue.close() |
| 79 | +} |
| 80 | + |
| 81 | +// tryGrant is used to attempt to grant to waiting requests. |
| 82 | +func (e *ElasticCPUGrantCoordinator) tryGrant() { |
| 83 | + e.elasticCPUGranter.tryGrant() |
| 84 | +} |
| 85 | + |
| 86 | +// NewPacer implements the PacerMaker interface. |
| 87 | +func (e *ElasticCPUGrantCoordinator) NewPacer(unit time.Duration, wi WorkInfo) *Pacer { |
| 88 | + if e == nil { |
| 89 | + return nil |
| 90 | + } |
| 91 | + return &Pacer{ |
| 92 | + unit: unit, |
| 93 | + wi: wi, |
| 94 | + wq: e.ElasticCPUWorkQueue, |
| 95 | + } |
| 96 | +} |
0 commit comments