Skip to content

Commit dc9f816

Browse files
[release-1.19] Make autoscaler instrument async so we can remove metric attributes when revisions go away (#16301)
* switch autoscaler metrics to be async - allowing removal * allow removing kpa metrics when a revision goes away --------- Co-authored-by: Dave Protasowski <dprotaso@gmail.com>
1 parent ea7032d commit dc9f816

File tree

10 files changed

+310
-175
lines changed

10 files changed

+310
-175
lines changed

pkg/autoscaler/metrics/stats_scraper.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -203,7 +203,8 @@ func newServiceScraperWithClient(
203203
semconv.K8SNamespaceName(m.ObjectMeta.Namespace),
204204
metrics.ServiceNameKey.With(svcName),
205205
metrics.ConfigurationNameKey.With(cfgName),
206-
metrics.RevisionNameKey.With(revisionName),
206+
// TODO: Re-enable when OTel has the ability to remove attributes
207+
// metrics.RevisionNameKey.With(revisionName),
207208
),
208209
}
209210
}

pkg/autoscaler/scaling/autoscaler.go

Lines changed: 12 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@ func newAutoscaler(
9191
deciderSpec *DeciderSpec,
9292
delayWindow *max.TimeWindow,
9393
) *autoscaler {
94-
metrics := newMetrics(mp, attrs)
94+
metrics := newMetrics(mp, deciderSpec.ScalingMetric, attrs)
9595
// We always start in the panic mode, if the deployment is scaled up over 1 pod.
9696
// If the scale is 0 or 1, normal Autoscaler behavior is fine.
9797
// When Autoscaler restarts we lose metric history, which causes us to
@@ -137,6 +137,10 @@ func (a *autoscaler) Update(deciderSpec *DeciderSpec) {
137137
a.deciderSpec = deciderSpec
138138
}
139139

140+
func (a *autoscaler) OnDelete() {
141+
a.metrics.OnDelete()
142+
}
143+
140144
// Scale calculates the desired scale based on current statistics given the current time.
141145
// desiredPodCount is the calculated pod count the autoscaler would like to set.
142146
// validScale signifies whether the desiredPodCount should be applied or not.
@@ -293,24 +297,13 @@ func (a *autoscaler) Scale(logger *zap.SugaredLogger, now time.Time) ScaleResult
293297
observedPanicValue, spec.TargetBurstCapacity, excessBCF))
294298
}
295299

296-
switch spec.ScalingMetric {
297-
case autoscaling.RPS:
298-
a.metrics.RecordRPS(
299-
excessBCF,
300-
int64(desiredPodCount),
301-
observedStableValue,
302-
observedPanicValue,
303-
spec.TargetValue,
304-
)
305-
default:
306-
a.metrics.RecordConcurrency(
307-
excessBCF,
308-
int64(desiredPodCount),
309-
observedStableValue,
310-
observedPanicValue,
311-
spec.TargetValue,
312-
)
313-
}
300+
a.metrics.Record(
301+
excessBCF,
302+
int64(desiredPodCount),
303+
observedStableValue,
304+
observedPanicValue,
305+
spec.TargetValue,
306+
)
314307

315308
return ScaleResult{
316309
DesiredPodCount: desiredPodCount,

pkg/autoscaler/scaling/autoscaler_test.go

Lines changed: 97 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -217,6 +217,7 @@ func expectedEBC(totCap, targetBC, recordedConcurrency, numPods float64) int32 {
217217
}
218218

219219
func TestAutoscalerStartMetrics(t *testing.T) {
220+
expectedAttrs := attribute.NewSet(attribute.String("foo", "bar"))
220221
metrics := &metricClient{StableConcurrency: 50.0, PanicConcurrency: 50.0}
221222

222223
_, _, reader := newTestAutoscalerWithScalingMetric(
@@ -227,39 +228,16 @@ func TestAutoscalerStartMetrics(t *testing.T) {
227228

228229
metricstest.AssertMetrics(
229230
t, reader,
230-
metricstest.MetricsEqual(scopeName, panicMetric(true)),
231-
)
232-
}
233-
234-
func TestAutoscalerMetrics(t *testing.T) {
235-
metrics := &metricClient{StableConcurrency: 50.0, PanicConcurrency: 50.0}
236-
a, _, reader := newTestAutoscaler(10, 100, metrics)
237-
expectedAttrs := attribute.NewSet(attribute.String("foo", "bar"))
238-
239-
// Non-panic created autoscaler.
240-
metricstest.AssertMetrics(
241-
t, reader,
242-
metricstest.MetricsEqual(scopeName, panicMetric(false)),
243-
)
244-
245-
ebc := expectedEBC(10, 100, 50, 1)
246-
expectScale(t, a, time.Now(), ScaleResult{5, ebc, true})
247-
spec := a.currentSpec()
248-
249-
metricstest.AssertMetrics(
250-
t, reader,
251-
metricstest.MetricsEqual(
252-
scopeName,
231+
metricstest.MetricsEqual(scopeName,
253232
panicMetric(true),
254-
255233
metricdata.Metrics{
256234
Name: "kn.revision.pods.desired",
257235
Description: "Number of pods the autoscaler wants to allocate",
258236
Unit: "{item}",
259237
Data: metricdata.Gauge[int64]{
260238
DataPoints: []metricdata.DataPoint[int64]{{
261239
Attributes: expectedAttrs,
262-
Value: 5,
240+
Value: 0,
263241
}},
264242
},
265243
},
@@ -271,7 +249,7 @@ func TestAutoscalerMetrics(t *testing.T) {
271249
Data: metricdata.Gauge[float64]{
272250
DataPoints: []metricdata.DataPoint[float64]{{
273251
Attributes: expectedAttrs,
274-
Value: float64(ebc),
252+
Value: float64(0),
275253
}},
276254
},
277255
},
@@ -283,7 +261,7 @@ func TestAutoscalerMetrics(t *testing.T) {
283261
Data: metricdata.Gauge[float64]{
284262
DataPoints: []metricdata.DataPoint[float64]{{
285263
Attributes: expectedAttrs,
286-
Value: 50,
264+
Value: 0,
287265
}},
288266
},
289267
},
@@ -295,7 +273,7 @@ func TestAutoscalerMetrics(t *testing.T) {
295273
Data: metricdata.Gauge[float64]{
296274
DataPoints: []metricdata.DataPoint[float64]{{
297275
Attributes: expectedAttrs,
298-
Value: 50,
276+
Value: 0,
299277
}},
300278
},
301279
},
@@ -307,14 +285,39 @@ func TestAutoscalerMetrics(t *testing.T) {
307285
Data: metricdata.Gauge[float64]{
308286
DataPoints: []metricdata.DataPoint[float64]{{
309287
Attributes: expectedAttrs,
310-
Value: float64(spec.TargetValue),
288+
Value: float64(0),
311289
}},
312290
},
313291
},
314292
),
315293
)
316294
}
317295

296+
func TestAutoscalerMetrics(t *testing.T) {
297+
metrics := &metricClient{StableConcurrency: 50.0, PanicConcurrency: 50.0}
298+
a, _, reader := newTestAutoscaler(10, 100, metrics)
299+
expectedAttrs := attribute.NewSet(attribute.String("foo", "bar"))
300+
301+
expectedMetrics := concurrencyDataPoints(expectedAttrs, false, 0, 0, 0, 0, 0)
302+
303+
// Non-panic created autoscaler.
304+
metricstest.AssertMetrics(
305+
t, reader,
306+
metricstest.MetricsEqual(scopeName, expectedMetrics...),
307+
)
308+
309+
ebc := expectedEBC(10, 100, 50, 1)
310+
expectScale(t, a, time.Now(), ScaleResult{5, ebc, true})
311+
spec := a.currentSpec()
312+
313+
expectedMetrics = concurrencyDataPoints(expectedAttrs, true, 5, float64(ebc), 50, 50, spec.TargetValue)
314+
315+
metricstest.AssertMetrics(
316+
t, reader,
317+
metricstest.MetricsEqual(scopeName, expectedMetrics...),
318+
)
319+
}
320+
318321
func TestAutoscalerMetricsWithRPS(t *testing.T) {
319322
expectedAttrs := attribute.NewSet(attribute.String("foo", "bar"))
320323
metrics := &metricClient{PanicRPS: 99.0, StableRPS: 100}
@@ -936,3 +939,68 @@ func panicMetric(panic bool) metricdata.Metrics {
936939
},
937940
}
938941
}
942+
943+
func concurrencyDataPoints(attrs attribute.Set, isPanic bool, pods int64, ebc, stable, panic, target float64) []metricdata.Metrics {
944+
return []metricdata.Metrics{
945+
panicMetric(isPanic),
946+
{
947+
Name: "kn.revision.pods.desired",
948+
Description: "Number of pods the autoscaler wants to allocate",
949+
Unit: "{item}",
950+
Data: metricdata.Gauge[int64]{
951+
DataPoints: []metricdata.DataPoint[int64]{{
952+
Attributes: attrs,
953+
Value: pods,
954+
}},
955+
},
956+
},
957+
958+
{
959+
Name: "kn.revision.capacity.excess",
960+
Description: "Excess burst capacity observed over the stable window",
961+
Unit: "{concurrency}",
962+
Data: metricdata.Gauge[float64]{
963+
DataPoints: []metricdata.DataPoint[float64]{{
964+
Attributes: attrs,
965+
Value: ebc,
966+
}},
967+
},
968+
},
969+
970+
{
971+
Name: "kn.revision.concurrency.stable",
972+
Description: "Average of request count per observed pod over the stable window",
973+
Unit: "{concurrency}",
974+
Data: metricdata.Gauge[float64]{
975+
DataPoints: []metricdata.DataPoint[float64]{{
976+
Attributes: attrs,
977+
Value: stable,
978+
}},
979+
},
980+
},
981+
982+
{
983+
Name: "kn.revision.concurrency.panic",
984+
Description: "Average of request count per observed pod over the panic window",
985+
Unit: "{concurrency}",
986+
Data: metricdata.Gauge[float64]{
987+
DataPoints: []metricdata.DataPoint[float64]{{
988+
Attributes: attrs,
989+
Value: panic,
990+
}},
991+
},
992+
},
993+
994+
{
995+
Name: "kn.revision.concurrency.target",
996+
Description: "The desired concurrent requests for each pod",
997+
Unit: "{concurrency}",
998+
Data: metricdata.Gauge[float64]{
999+
DataPoints: []metricdata.DataPoint[float64]{{
1000+
Attributes: attrs,
1001+
Value: target,
1002+
}},
1003+
},
1004+
},
1005+
}
1006+
}

0 commit comments

Comments
 (0)