Skip to content

Commit 602e432

Browse files
authored
Merge pull request #9269 from norbertcyran/buffers-limits-hijack-condition
Do not override condition in buffers limits translator
2 parents dcb0a67 + 435aefa commit 602e432

File tree

3 files changed

+73
-22
lines changed

3 files changed

+73
-22
lines changed

cluster-autoscaler/capacitybuffer/testutil/testutil.go

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -96,10 +96,15 @@ func GetBufferStatus(podTempRef *v1.LocalObjectRef, replicas *int32, podTemplate
9696

9797
// GetConditionReady returns a list of conditions with a condition ready and empty message, should be used for testing purposes only
9898
func GetConditionReady() []metav1.Condition {
99+
return GetConditionReadyWithMessage("")
100+
}
101+
102+
// GetConditionReadyWithMessage returns a list of conditions with a condition ready and the specified message
103+
func GetConditionReadyWithMessage(message string) []metav1.Condition {
99104
readyCondition := metav1.Condition{
100105
Type: capacitybuffer.ReadyForProvisioningCondition,
101106
Status: metav1.ConditionTrue,
102-
Message: "",
107+
Message: message,
103108
Reason: capacitybuffer.AttributesSetSuccessfullyReason,
104109
LastTransitionTime: metav1.Time{},
105110
}
@@ -108,10 +113,15 @@ func GetConditionReady() []metav1.Condition {
108113

109114
// GetConditionNotReady returns a list of conditions with a condition not ready and empty message, should be used for testing purposes only
110115
func GetConditionNotReady() []metav1.Condition {
116+
return GetConditionNotReadyWithMessage("")
117+
}
118+
119+
// GetConditionNotReadyWithMessage returns a list of condition with a condition not ready and specified message.
120+
func GetConditionNotReadyWithMessage(message string) []metav1.Condition {
111121
notReadyCondition := metav1.Condition{
112122
Type: capacitybuffer.ReadyForProvisioningCondition,
113123
Status: metav1.ConditionFalse,
114-
Message: "",
124+
Message: message,
115125
Reason: "error",
116126
LastTransitionTime: metav1.Time{},
117127
}
@@ -155,6 +165,13 @@ func WithReplicas(replicas int32) BufferOption {
155165
}
156166
}
157167

168+
// WithLimits sets the Spec.Limits
169+
func WithLimits(limits v1.ResourceList) BufferOption {
170+
return func(b *v1.CapacityBuffer) {
171+
b.Spec.Limits = &limits
172+
}
173+
}
174+
158175
// WithStatusPodTemplateRef sets the Status.PodTemplateRef
159176
func WithStatusPodTemplateRef(name string) BufferOption {
160177
return func(b *v1.CapacityBuffer) {

cluster-autoscaler/capacitybuffer/translators/resource_limits_translator.go

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,13 @@ limitations under the License.
1717
package translator
1818

1919
import (
20+
"errors"
2021
"fmt"
2122

2223
corev1 "k8s.io/api/core/v1"
24+
"k8s.io/apimachinery/pkg/api/meta"
2325
api_v1 "k8s.io/autoscaler/cluster-autoscaler/apis/capacitybuffer/autoscaling.x-k8s.io/v1beta1"
26+
"k8s.io/autoscaler/cluster-autoscaler/capacitybuffer"
2427
cbclient "k8s.io/autoscaler/cluster-autoscaler/capacitybuffer/client"
2528
"k8s.io/autoscaler/cluster-autoscaler/capacitybuffer/common"
2629
)
@@ -39,36 +42,35 @@ func NewResourceLimitsTranslator(client *cbclient.CapacityBufferClient) *resourc
3942

4043
// Translate translates buffers processors into pod capacity.
4144
func (t *resourceLimitsTranslator) Translate(buffers []*api_v1.CapacityBuffer) []error {
42-
errors := []error{}
45+
var errs []error
4346
for _, buffer := range buffers {
4447
if isResourcesLimitsDefinedInBuffer(buffer) {
45-
if buffer.Status.PodTemplateRef == nil {
46-
err := fmt.Errorf("Can't get pod template, PodTemplateRef is nil")
47-
common.SetBufferAsNotReadyForProvisioning(buffer, nil, nil, nil, buffer.Spec.ProvisioningStrategy, err)
48-
errors = append(errors, err)
48+
if buffer.Status.PodTemplateRef == nil || buffer.Status.PodTemplateGeneration == nil || meta.IsStatusConditionFalse(buffer.Status.Conditions, capacitybuffer.ReadyForProvisioningCondition) {
49+
// that means that previous translators failed to resolve the pod template, we don't want to override
50+
// the condition here in order to keep the error message from previous translators.
4951
continue
5052
}
5153
podTemplate, err := t.client.GetPodTemplate(buffer.Namespace, buffer.Status.PodTemplateRef.Name)
5254
if err != nil {
5355
err = fmt.Errorf("Couldn't get pod template, error: %v", err)
5456
common.SetBufferAsNotReadyForProvisioning(buffer, nil, nil, nil, buffer.Spec.ProvisioningStrategy, err)
55-
errors = append(errors, err)
57+
errs = append(errs, err)
5658
continue
5759
}
5860
numberOfPods := limitNumberOfPodsForResource(podTemplate, *buffer.Spec.Limits)
5961
if numberOfPods == nil {
60-
err := fmt.Errorf("Couldn't calculate number of pods for buffer %v based on provided resource limits %v", buffer.Name, *buffer.Spec.Limits)
62+
err := errors.New("couldn't calculate number of pods for buffer based on provided resource limits. Check if the pod template requests at least one limited resource")
6163
common.SetBufferAsNotReadyForProvisioning(buffer, nil, nil, nil, buffer.Spec.ProvisioningStrategy, err)
62-
errors = append(errors, err)
64+
// this error is expected when the buffer is misconfigured, so we don't want it to trigger requeue
6365
continue
6466
}
6567
if buffer.Status.Replicas != nil {
6668
numberOfPods = pointerToInt32(min(*buffer.Status.Replicas, *numberOfPods))
6769
}
68-
common.SetBufferAsReadyForProvisioning(buffer, buffer.Status.PodTemplateRef, &podTemplate.Generation, numberOfPods, buffer.Spec.ProvisioningStrategy)
70+
common.SetBufferAsReadyForProvisioning(buffer, buffer.Status.PodTemplateRef, buffer.Status.PodTemplateGeneration, numberOfPods, buffer.Spec.ProvisioningStrategy)
6971
}
7072
}
71-
return errors
73+
return errs
7274
}
7375

7476
func limitNumberOfPodsForResource(podTemplate *corev1.PodTemplate, limits api_v1.ResourceList) *int32 {

cluster-autoscaler/capacitybuffer/translators/resource_limits_translator_test.go

Lines changed: 42 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -19,15 +19,17 @@ package translator
1919
import (
2020
"testing"
2121

22+
"github.com/google/go-cmp/cmp"
23+
"github.com/google/go-cmp/cmp/cmpopts"
24+
"github.com/stretchr/testify/assert"
2225
corev1 "k8s.io/api/core/v1"
2326
"k8s.io/apimachinery/pkg/api/resource"
2427
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
25-
fakeClient "k8s.io/client-go/kubernetes/fake"
26-
27-
"github.com/stretchr/testify/assert"
2828
v1 "k8s.io/autoscaler/cluster-autoscaler/apis/capacitybuffer/autoscaling.x-k8s.io/v1beta1"
2929
cbclient "k8s.io/autoscaler/cluster-autoscaler/capacitybuffer/client"
3030
"k8s.io/autoscaler/cluster-autoscaler/capacitybuffer/testutil"
31+
fakeClient "k8s.io/client-go/kubernetes/fake"
32+
"k8s.io/utils/ptr"
3133
)
3234

3335
func TestResourceLimitsTranslator(t *testing.T) {
@@ -46,6 +48,7 @@ func TestResourceLimitsTranslator(t *testing.T) {
4648
})
4749
fakeClient := fakeClient.NewSimpleClientset(podTemp4mem100cpu, podTemp8mem200cpu, podTemp4gpu)
4850
fakeCapacityBuffersClient, _ := cbclient.NewCapacityBufferClient(nil, fakeClient, nil, nil, nil, nil, nil, nil, nil, nil, nil)
51+
noResourcesSetMessage := "couldn't calculate number of pods for buffer based on provided resource limits. Check if the pod template requests at least one limited resource"
4952

5053
tests := []struct {
5154
name string
@@ -71,9 +74,9 @@ func TestResourceLimitsTranslator(t *testing.T) {
7174
}),
7275
},
7376
expectedStatus: []*v1.CapacityBufferStatus{
74-
testutil.GetBufferStatus(nil, nil, nil, &testutil.ProvisioningStrategy, testutil.GetConditionNotReady()),
77+
testutil.GetBufferStatus(nil, nil, nil, &testutil.ProvisioningStrategy, nil),
7578
},
76-
expectedNumberOfErrors: 1,
79+
expectedNumberOfErrors: 0,
7780
},
7881
{
7982
name: "Limits exist and no replicas, buffer filtered",
@@ -185,27 +188,56 @@ func TestResourceLimitsTranslator(t *testing.T) {
185188
}),
186189
},
187190
expectedStatus: []*v1.CapacityBufferStatus{
188-
testutil.GetBufferStatus(nil, nil, nil, &testutil.ProvisioningStrategy, testutil.GetConditionNotReady()),
191+
testutil.GetBufferStatus(
192+
nil, nil, nil, &testutil.ProvisioningStrategy, testutil.GetConditionNotReadyWithMessage(noResourcesSetMessage)),
189193
},
190-
expectedNumberOfErrors: 1,
194+
expectedNumberOfErrors: 0,
195+
},
196+
{
197+
name: "conditions are not overridden if buffer is not ready",
198+
buffers: []*v1.CapacityBuffer{
199+
testutil.NewBuffer(
200+
testutil.WithPodTemplateRef(podTemp4mem100cpu.Name),
201+
testutil.WithLimits(v1.ResourceList{
202+
"nvidia.com/gpu": resource.MustParse("100m"),
203+
}),
204+
testutil.WithActiveProvisioningStrategy(),
205+
func(buffer *v1.CapacityBuffer) {
206+
buffer.Status.Conditions = testutil.GetConditionNotReadyWithMessage("test error")
207+
},
208+
),
209+
},
210+
expectedStatus: []*v1.CapacityBufferStatus{
211+
testutil.GetBufferStatus(nil, nil, nil, nil, testutil.GetConditionNotReadyWithMessage("test error")),
212+
},
213+
expectedNumberOfErrors: 0,
191214
},
192215
}
193216
for _, test := range tests {
194217
t.Run(test.name, func(t *testing.T) {
195218
translator := NewResourceLimitsTranslator(fakeCapacityBuffersClient)
196219
errors := translator.Translate(test.buffers)
197220
assert.Equal(t, len(errors), test.expectedNumberOfErrors)
198-
assert.ElementsMatch(t, test.expectedStatus, testutil.SanitizeBuffersStatus(test.buffers))
221+
for i, buffer := range test.buffers {
222+
wantStatus := test.expectedStatus[i]
223+
if diff := cmp.Diff(wantStatus, &buffer.Status, cmpopts.IgnoreFields(metav1.Condition{}, "LastTransitionTime")); diff != "" {
224+
t.Errorf("buffer status mismatch (-want +got):\n%s", diff)
225+
}
226+
}
199227
})
200228
}
201229
}
202230

203231
func getTestBufferWithLimits(podTemplateRef *v1.LocalObjectRef, replicas *int32, limits *v1.ResourceList) *v1.CapacityBuffer {
204-
return testutil.GetBuffer(&testutil.ProvisioningStrategy, nil, nil, podTemplateRef, replicas, nil, nil, limits)
232+
var podTemplateGeneration *int64
233+
if podTemplateRef != nil {
234+
podTemplateGeneration = ptr.To[int64](1)
235+
}
236+
return testutil.GetBuffer(&testutil.ProvisioningStrategy, nil, nil, podTemplateRef, replicas, podTemplateGeneration, nil, limits)
205237
}
206238

207239
func getTestBufferStatusWithReplicas(podTemplateRef *v1.LocalObjectRef, replicas int32) *v1.CapacityBufferStatus {
208-
return testutil.GetBufferStatus(podTemplateRef, &replicas, pointerToInt64(1), &testutil.ProvisioningStrategy, testutil.GetConditionReady())
240+
return testutil.GetBufferStatus(podTemplateRef, &replicas, ptr.To[int64](1), &testutil.ProvisioningStrategy, testutil.GetConditionReadyWithMessage("ready"))
209241
}
210242

211243
func getPodTemplateWithResources(name string, resources corev1.ResourceList) *corev1.PodTemplate {

0 commit comments

Comments
 (0)