Merge pull request #7886 from plkokanov/fix/empty-histogram-after-load-from-checkpoint

k8s-ci-robot · web-flow · commit 94ae175e94b3 · 2025-03-26T03:04:36.000-07:00
Fixes histograms becoming empty after loaded from checkpoints
diff --git a/vertical-pod-autoscaler/pkg/recommender/util/histogram.go b/vertical-pod-autoscaler/pkg/recommender/util/histogram.go
@@ -278,6 +278,15 @@ func (h *histogram) LoadFromCheckpoint(checkpoint *vpa_types.HistogramCheckpoint
 	}
 	h.totalWeight += checkpoint.TotalWeight
 
+	// In some cases where the weight of the max bucket is close (equal or less) to `MaxCheckpointWeight` times epsilon
+	// and there are buckets with weights slightly higher or equal to epsilon, saving the histogram to a checkpoint and
+	// then loading it will cause the weights that are close to epsilon to become smaller than epsilon due to rounding errors
+	// and differences between the load and save algorithm. If one of those weights is the min weight, this will cause the
+	// histogram to incorrectly become "empty" and the `Percentile(...)` function to always return 0.
+	// To cover for such cases, the min and max buckets are updated here, so that those less than epsilon are dropped.
+	// For more information check https://github.com/kubernetes/autoscaler/issues/7726
+	h.updateMinAndMaxBucket()
+
 	return nil
 }
 
diff --git a/vertical-pod-autoscaler/pkg/recommender/util/histogram_test.go b/vertical-pod-autoscaler/pkg/recommender/util/histogram_test.go
@@ -265,6 +265,52 @@ func TestHistogramLoadFromCheckpointReturnsErrorOnNilInput(t *testing.T) {
 	assert.Error(t, err)
 }
 
+func TestHistogramIsNotEmptyAfterSavingAndLoadingCheckpointsWithBoundaryValues(t *testing.T) {
+	// There is a specific scenario in which the weights of the minimum and maximum histogram buckets,
+	// when saved to a VPACheckpoint and subsequently loaded, result in diminished weights for the minimum buckets.
+
+	// This issue arises due to rounding errors when converting float weights to integers in the VPACheckpoint.
+	// For instance, consider the weights:
+	// `w1` which approximates but is slightly larger than or equal to `epsilon`,
+	// `w2` which approximates but is slightly smaller than or equal to (`MaxCheckpointWeight` * `epsilon`) - `epsilon`.
+
+	// When these weights are stored in a VPACheckpoint, they are translated to integers:
+	// `w1` rounds to `1` (`wi1`),
+	// `w2` rounds to `MaxCheckpointWeight` (`wi2`).
+
+	// Upon loading from the VPACheckpoint, the histogram reconstructs its weights using a calculated ratio,
+	// aimed at reverting integer weights back to float values. This ratio is derived from:
+	// (`w1` + `w2`) / (`wi1` + `wi2`)
+	// Reference:  https://github.com/kubernetes/autoscaler/blob/aa1d413ea3bf319b56c7b2e65ade1a028e149439/vertical-pod-autoscaler//pkg/recommender/util/histogram.go#L256-L269
+
+	// Given the maximum potential values for `w1`, `w2`, `wi1` and `wi2` we arrive at:
+	// (`epsilon` + `MaxCheckpointWeight` * `epsilon` - `epsilon`) / (1 + MaxCheckpointWeight) = epsilon * `MaxCheckpointWeight` / (1 + MaxCheckpointWeight)
+
+	// Consequently, the maximum value for this ratio is less than `epsilon`, implying that when `w1`,
+	// initially scaled to `1`, is adjusted by this ratio, its recalculated weight falls short of `epsilon`.
+	// When the `minBucket`'s weight is less than `epsilon`, the `histogram.IsEmpty()` returns true.
+	// Reference: https://github.com/kubernetes/autoscaler/blob/aa1d413ea3bf319b56c7b2e65ade1a028e149439/vertical-pod-autoscaler/pkg/recommender/util/histogram.go#L181-L183
+	// Consequently, the `histogram.Percentile(...)` function will always return 0.
+	// Reference: https://github.com/kubernetes/autoscaler/blob/aa1d413ea3bf319b56c7b2e65ade1a028e149439/vertical-pod-autoscaler/pkg/recommender/util/histogram.go#L159-L162
+	// The same behavior can be observed when there are more than two weights.
+
+	// This test ensures that in such cases the histogram does not become empty.
+	// For more information check https://github.com/kubernetes/autoscaler/issues/7726
+
+	histogram := NewHistogram(testHistogramOptions)
+	histogram.AddSample(1, weightEpsilon, anyTime)
+	histogram.AddSample(2, (float64(MaxCheckpointWeight)*weightEpsilon - weightEpsilon), anyTime)
+	assert.False(t, histogram.IsEmpty())
+
+	checkpoint, err := histogram.SaveToChekpoint()
+	assert.NoError(t, err)
+
+	newHistogram := NewHistogram(testHistogramOptions)
+	err = newHistogram.LoadFromCheckpoint(checkpoint)
+	assert.NoError(t, err)
+	assert.False(t, newHistogram.IsEmpty())
+}
+
 func areUnique(values ...interface{}) bool {
 	dict := make(map[interface{}]bool)
 	for i, v := range values {

Original file line number	Diff line number	Diff line change
`@@ -278,6 +278,15 @@ func (h histogram) LoadFromCheckpoint(checkpoint vpa_types.HistogramCheckpoint`
`278`	`278`	`}`
`279`	`279`	`h.totalWeight += checkpoint.TotalWeight`
`280`	`280`
	`281`	+ // In some cases where the weight of the max bucket is close (equal or less) to `MaxCheckpointWeight` times epsilon
	`282`	`+ // and there are buckets with weights slightly higher or equal to epsilon, saving the histogram to a checkpoint and`
	`283`	`+ // then loading it will cause the weights that are close to epsilon to become smaller than epsilon due to rounding errors`
	`284`	`+ // and differences between the load and save algorithm. If one of those weights is the min weight, this will cause the`
	`285`	+ // histogram to incorrectly become "empty" and the `Percentile(...)` function to always return 0.
	`286`	`+ // To cover for such cases, the min and max buckets are updated here, so that those less than epsilon are dropped.`
	`287`	`+ // For more information check https://github.com/kubernetes/autoscaler/issues/7726`
	`288`	`+ h.updateMinAndMaxBucket()`
	`289`	`+`
`281`	`290`	`return nil`
`282`	`291`	`}`
`283`	`292`