Skip to content

Commit 7d9c533

Browse files
committed
Merge branch 'master' of github.com:kgolab/autoscaler into vpa-updater-metrics
2 parents 64d7c6f + 20c2492 commit 7d9c533

File tree

10 files changed

+114
-48
lines changed

10 files changed

+114
-48
lines changed

cluster-autoscaler/cloudprovider/clusterapi/clusterapi_controller_test.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -987,7 +987,7 @@ func TestControllerMachineSetNodeNamesUsingProviderID(t *testing.T) {
987987
})
988988

989989
for i := range testConfig.nodes {
990-
if nodeNames[i].Id != string(normalizedProviderString(testConfig.nodes[i].Spec.ProviderID)) {
990+
if nodeNames[i].Id != testConfig.nodes[i].Spec.ProviderID {
991991
t.Fatalf("expected %q, got %q", testConfig.nodes[i].Spec.ProviderID, nodeNames[i].Id)
992992
}
993993
}
@@ -1035,7 +1035,7 @@ func TestControllerMachineSetNodeNamesUsingStatusNodeRefName(t *testing.T) {
10351035
})
10361036

10371037
for i := range testConfig.nodes {
1038-
if nodeNames[i].Id != string(normalizedProviderString(testConfig.nodes[i].Spec.ProviderID)) {
1038+
if nodeNames[i].Id != testConfig.nodes[i].Spec.ProviderID {
10391039
t.Fatalf("expected %q, got %q", testConfig.nodes[i].Spec.ProviderID, nodeNames[i].Id)
10401040
}
10411041
}

cluster-autoscaler/cloudprovider/clusterapi/clusterapi_nodegroup.go

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -195,10 +195,14 @@ func (ng *nodegroup) Nodes() ([]cloudprovider.Instance, error) {
195195
return nil, err
196196
}
197197

198+
// Nodes do not have normalized IDs, so do not normalize the ID here.
199+
// The IDs returned here are used to check if a node is registered or not and
200+
// must match the ID on the Node object itself.
201+
// https://github.com/kubernetes/autoscaler/blob/a973259f1852303ba38a3a61eeee8489cf4e1b13/cluster-autoscaler/clusterstate/clusterstate.go#L967-L985
198202
instances := make([]cloudprovider.Instance, len(nodes))
199203
for i := range nodes {
200204
instances[i] = cloudprovider.Instance{
201-
Id: string(normalizedProviderString(nodes[i])),
205+
Id: nodes[i],
202206
}
203207
}
204208

cluster-autoscaler/cloudprovider/clusterapi/clusterapi_nodegroup_test.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -659,7 +659,7 @@ func TestNodeGroupDeleteNodes(t *testing.T) {
659659
})
660660

661661
for i := 0; i < len(nodeNames); i++ {
662-
if nodeNames[i].Id != string(normalizedProviderString(testConfig.nodes[i].Spec.ProviderID)) {
662+
if nodeNames[i].Id != testConfig.nodes[i].Spec.ProviderID {
663663
t.Fatalf("expected %q, got %q", testConfig.nodes[i].Spec.ProviderID, nodeNames[i].Id)
664664
}
665665
}
@@ -844,7 +844,7 @@ func TestNodeGroupDeleteNodesTwice(t *testing.T) {
844844
})
845845

846846
for i := 0; i < len(nodeNames); i++ {
847-
if nodeNames[i].Id != string(normalizedProviderString(testConfig.nodes[i].Spec.ProviderID)) {
847+
if nodeNames[i].Id != testConfig.nodes[i].Spec.ProviderID {
848848
t.Fatalf("expected %q, got %q", testConfig.nodes[i].Spec.ProviderID, nodeNames[i].Id)
849849
}
850850
}
@@ -998,7 +998,7 @@ func TestNodeGroupWithFailedMachine(t *testing.T) {
998998
nodeIndex = i
999999
}
10001000

1001-
if nodeNames[i].Id != string(normalizedProviderString(testConfig.nodes[nodeIndex].Spec.ProviderID)) {
1001+
if nodeNames[i].Id != testConfig.nodes[nodeIndex].Spec.ProviderID {
10021002
t.Fatalf("expected %q, got %q", testConfig.nodes[nodeIndex].Spec.ProviderID, nodeNames[i].Id)
10031003
}
10041004
}

cluster-autoscaler/core/scale_up.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -464,6 +464,7 @@ func ScaleUp(context *context.AutoscalingContext, processors *ca_processors.Auto
464464
if context.MaxNodesTotal > 0 && len(nodes)+newNodes+len(upcomingNodes) > context.MaxNodesTotal {
465465
klog.V(1).Infof("Capping size to max cluster total size (%d)", context.MaxNodesTotal)
466466
newNodes = context.MaxNodesTotal - len(nodes) - len(upcomingNodes)
467+
context.LogRecorder.Eventf(apiv1.EventTypeWarning, "MaxNodesTotalReached", "Max total nodes in cluster reached: %v", context.MaxNodesTotal)
467468
if newNodes < 1 {
468469
return &status.ScaleUpStatus{Result: status.ScaleUpError}, errors.NewAutoscalerError(
469470
errors.TransientError,

vertical-pod-autoscaler/deploy/vpa-rbac.yaml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -156,6 +156,13 @@ kind: ClusterRole
156156
metadata:
157157
name: system:vpa-target-reader
158158
rules:
159+
- apiGroups:
160+
- '*'
161+
resources:
162+
- '*/scale'
163+
verbs:
164+
- get
165+
- watch
159166
- apiGroups:
160167
- ""
161168
resources:

vertical-pod-autoscaler/pkg/recommender/model/cluster.go

Lines changed: 42 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -46,9 +46,8 @@ type ClusterState struct {
4646
// time we've noticed the recommendation missing or last time we logged
4747
// a warning about it.
4848
EmptyVPAs map[VpaID]time.Time
49-
// VpasWithMatchingPods contains information if there exist live pods that
50-
// this VPAs selector matches.
51-
VpasWithMatchingPods map[VpaID]bool
49+
// VpaPodCount contains number of live Pods matching a given VPA object.
50+
VpaPodCount map[VpaID]int
5251
// Observed VPAs. Used to check if there are updates needed.
5352
ObservedVpas []*vpa_types.VerticalPodAutoscaler
5453

@@ -97,12 +96,12 @@ type PodState struct {
9796
// NewClusterState returns a new ClusterState with no pods.
9897
func NewClusterState() *ClusterState {
9998
return &ClusterState{
100-
Pods: make(map[PodID]*PodState),
101-
Vpas: make(map[VpaID]*Vpa),
102-
EmptyVPAs: make(map[VpaID]time.Time),
103-
VpasWithMatchingPods: make(map[VpaID]bool),
104-
aggregateStateMap: make(aggregateContainerStatesMap),
105-
labelSetMap: make(labelSetMap),
99+
Pods: make(map[PodID]*PodState),
100+
Vpas: make(map[VpaID]*Vpa),
101+
EmptyVPAs: make(map[VpaID]time.Time),
102+
VpaPodCount: make(map[VpaID]int),
103+
aggregateStateMap: make(aggregateContainerStatesMap),
104+
labelSetMap: make(labelSetMap),
106105
}
107106
}
108107

@@ -124,18 +123,44 @@ func (cluster *ClusterState) AddOrUpdatePod(podID PodID, newLabels labels.Set, p
124123
pod = newPod(podID)
125124
cluster.Pods[podID] = pod
126125
}
126+
127127
newlabelSetKey := cluster.getLabelSetKey(newLabels)
128+
if podExists && pod.labelSetKey != newlabelSetKey {
129+
// This Pod is already counted in the old VPA, remove the link.
130+
cluster.removePodFromItsVpa(pod)
131+
}
128132
if !podExists || pod.labelSetKey != newlabelSetKey {
129133
pod.labelSetKey = newlabelSetKey
130134
// Set the links between the containers and aggregations based on the current pod labels.
131135
for containerName, container := range pod.Containers {
132136
containerID := ContainerID{PodID: podID, ContainerName: containerName}
133137
container.aggregator = cluster.findOrCreateAggregateContainerState(containerID)
134138
}
139+
140+
cluster.addPodToItsVpa(pod)
135141
}
136142
pod.Phase = phase
137143
}
138144

145+
// addPodToItsVpa increases the count of Pods associated with a VPA object.
146+
// Does a scan similar to findOrCreateAggregateContainerState so could be optimized if needed.
147+
func (cluster *ClusterState) addPodToItsVpa(pod *PodState) {
148+
for _, vpa := range cluster.Vpas {
149+
if vpa_utils.PodLabelsMatchVPA(pod.ID.Namespace, cluster.labelSetMap[pod.labelSetKey], vpa.ID.Namespace, vpa.PodSelector) {
150+
cluster.VpaPodCount[vpa.ID]++
151+
}
152+
}
153+
}
154+
155+
// removePodFromItsVpa decreases the count of Pods associated with a VPA object.
156+
func (cluster *ClusterState) removePodFromItsVpa(pod *PodState) {
157+
for _, vpa := range cluster.Vpas {
158+
if vpa_utils.PodLabelsMatchVPA(pod.ID.Namespace, cluster.labelSetMap[pod.labelSetKey], vpa.ID.Namespace, vpa.PodSelector) {
159+
cluster.VpaPodCount[vpa.ID]--
160+
}
161+
}
162+
}
163+
139164
// GetContainer returns the ContainerState object for a given ContainerID or
140165
// null if it's not present in the model.
141166
func (cluster *ClusterState) GetContainer(containerID ContainerID) *ContainerState {
@@ -151,6 +176,10 @@ func (cluster *ClusterState) GetContainer(containerID ContainerID) *ContainerSta
151176

152177
// DeletePod removes an existing pod from the cluster.
153178
func (cluster *ClusterState) DeletePod(podID PodID) {
179+
pod, found := cluster.Pods[podID]
180+
if found {
181+
cluster.removePodFromItsVpa(pod)
182+
}
154183
delete(cluster.Pods, podID)
155184
}
156185

@@ -237,10 +266,9 @@ func (cluster *ClusterState) AddOrUpdateVpa(apiObject *vpa_types.VerticalPodAuto
237266
vpa = NewVpa(vpaID, selector, apiObject.CreationTimestamp.Time)
238267
cluster.Vpas[vpaID] = vpa
239268
for aggregationKey, aggregation := range cluster.aggregateStateMap {
240-
if vpa.UseAggregationIfMatching(aggregationKey, aggregation) {
241-
cluster.VpasWithMatchingPods[vpa.ID] = true
242-
}
269+
vpa.UseAggregationIfMatching(aggregationKey, aggregation)
243270
}
271+
cluster.VpaPodCount[vpaID] = len(cluster.GetMatchingPods(vpa))
244272
}
245273
vpa.TargetRef = apiObject.Spec.TargetRef
246274
vpa.Annotations = annotationsMap
@@ -262,7 +290,7 @@ func (cluster *ClusterState) DeleteVpa(vpaID VpaID) error {
262290
}
263291
delete(cluster.Vpas, vpaID)
264292
delete(cluster.EmptyVPAs, vpaID)
265-
delete(cluster.VpasWithMatchingPods, vpaID)
293+
delete(cluster.VpaPodCount, vpaID)
266294
return nil
267295
}
268296

@@ -313,9 +341,7 @@ func (cluster *ClusterState) findOrCreateAggregateContainerState(containerID Con
313341
cluster.aggregateStateMap[aggregateStateKey] = aggregateContainerState
314342
// Link the new aggregation to the existing VPAs.
315343
for _, vpa := range cluster.Vpas {
316-
if vpa.UseAggregationIfMatching(aggregateStateKey, aggregateContainerState) {
317-
cluster.VpasWithMatchingPods[vpa.ID] = true
318-
}
344+
vpa.UseAggregationIfMatching(aggregateStateKey, aggregateContainerState)
319345
}
320346
}
321347
return aggregateContainerState

vertical-pod-autoscaler/pkg/recommender/model/cluster_test.go

Lines changed: 44 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,8 @@ import (
3232

3333
var (
3434
testPodID = PodID{"namespace-1", "pod-1"}
35+
testPodID3 = PodID{"namespace-1", "pod-3"}
36+
testPodID4 = PodID{"namespace-1", "pod-4"}
3537
testContainerID = ContainerID{testPodID, "container-1"}
3638
testVpaID = VpaID{"namespace-1", "vpa-1"}
3739
testAnnotations = vpaAnnotationsMap{"key-1": "value-1"}
@@ -701,13 +703,13 @@ func TestVPAWithMatchingPods(t *testing.T) {
701703
name string
702704
vpaSelector string
703705
pods []podDesc
704-
expectedMatch bool
706+
expectedMatch int
705707
}{
706708
{
707709
name: "No pods",
708710
vpaSelector: testSelectorStr,
709711
pods: []podDesc{},
710-
expectedMatch: false,
712+
expectedMatch: 0,
711713
},
712714
{
713715
name: "VPA with matching pod",
@@ -719,7 +721,7 @@ func TestVPAWithMatchingPods(t *testing.T) {
719721
apiv1.PodRunning,
720722
},
721723
},
722-
expectedMatch: true,
724+
expectedMatch: 1,
723725
},
724726
{
725727
name: "No matching pod",
@@ -731,19 +733,55 @@ func TestVPAWithMatchingPods(t *testing.T) {
731733
apiv1.PodRunning,
732734
},
733735
},
734-
expectedMatch: false,
736+
expectedMatch: 0,
737+
},
738+
{
739+
name: "VPA with 2 matching pods, 1 not matching",
740+
vpaSelector: testSelectorStr,
741+
pods: []podDesc{
742+
{
743+
testPodID,
744+
emptyLabels, // does not match VPA
745+
apiv1.PodRunning,
746+
},
747+
{
748+
testPodID3,
749+
testLabels,
750+
apiv1.PodRunning,
751+
},
752+
{
753+
testPodID4,
754+
testLabels,
755+
apiv1.PodRunning,
756+
},
757+
},
758+
expectedMatch: 2,
735759
},
736760
}
761+
// Run with adding VPA first
737762
for _, tc := range cases {
738-
t.Run(tc.name, func(t *testing.T) {
763+
t.Run(tc.name+", VPA first", func(t *testing.T) {
739764
cluster := NewClusterState()
740765
vpa := addVpa(cluster, testVpaID, testAnnotations, tc.vpaSelector)
741766
for _, podDesc := range tc.pods {
742767
cluster.AddOrUpdatePod(podDesc.id, podDesc.labels, podDesc.phase)
743768
containerID := ContainerID{testPodID, "foo"}
744769
assert.NoError(t, cluster.AddOrUpdateContainer(containerID, testRequest))
745770
}
746-
assert.Equal(t, tc.expectedMatch, cluster.VpasWithMatchingPods[vpa.ID])
771+
assert.Equal(t, tc.expectedMatch, cluster.VpaPodCount[vpa.ID])
772+
})
773+
}
774+
// Run with adding Pods first
775+
for _, tc := range cases {
776+
t.Run(tc.name+", Pods first", func(t *testing.T) {
777+
cluster := NewClusterState()
778+
for _, podDesc := range tc.pods {
779+
cluster.AddOrUpdatePod(podDesc.id, podDesc.labels, podDesc.phase)
780+
containerID := ContainerID{testPodID, "foo"}
781+
assert.NoError(t, cluster.AddOrUpdateContainer(containerID, testRequest))
782+
}
783+
vpa := addVpa(cluster, testVpaID, testAnnotations, tc.vpaSelector)
784+
assert.Equal(t, tc.expectedMatch, cluster.VpaPodCount[vpa.ID])
747785
})
748786
}
749787
}

vertical-pod-autoscaler/pkg/recommender/model/vpa.go

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -126,20 +126,18 @@ func NewVpa(id VpaID, selector labels.Selector, created time.Time) *Vpa {
126126
}
127127

128128
// UseAggregationIfMatching checks if the given aggregation matches (contributes to) this VPA
129-
// and adds it to the set of VPA's aggregations if that is the case. Returns true
130-
// if the aggregation matches VPA.
131-
func (vpa *Vpa) UseAggregationIfMatching(aggregationKey AggregateStateKey, aggregation *AggregateContainerState) bool {
129+
// and adds it to the set of VPA's aggregations if that is the case.
130+
func (vpa *Vpa) UseAggregationIfMatching(aggregationKey AggregateStateKey, aggregation *AggregateContainerState) {
132131
if vpa.UsesAggregation(aggregationKey) {
133-
return true
132+
// Already linked, we can return quickly.
133+
return
134134
}
135135
if vpa.matchesAggregation(aggregationKey) {
136136
vpa.aggregateContainerStates[aggregationKey] = aggregation
137137
aggregation.IsUnderVPA = true
138138
aggregation.UpdateMode = vpa.UpdateMode
139139
aggregation.UpdateFromPolicy(vpa_api_util.GetContainerResourcePolicy(aggregationKey.ContainerName(), vpa.ResourcePolicy))
140-
return true
141140
}
142-
return false
143141
}
144142

145143
// UpdateRecommendation updates the recommended resources in the VPA and its

0 commit comments

Comments
 (0)