Skip to content

Commit 81be2e9

Browse files
Fix: Truncate large error messages in status conditions (#2169)
When upgrading operators, CRD validation errors can be very large (50KB+). Kubernetes rejects status updates over 32KB with "Too long: may not be more than 32768 bytes". This causes ClusterExtension upgrades to fail and get stuck. Assisted-by: Cursor
1 parent 78d8a2a commit 81be2e9

File tree

3 files changed

+208
-11
lines changed

3 files changed

+208
-11
lines changed

internal/operator-controller/controllers/clusterextension_controller.go

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,6 @@ import (
2222
"fmt"
2323
"io/fs"
2424
"strings"
25-
"time"
2625

2726
"github.com/go-logr/logr"
2827
"helm.sh/helm/v3/pkg/release"
@@ -156,15 +155,13 @@ func ensureAllConditionsWithReason(ext *ocv1.ClusterExtension, reason v1alpha1.C
156155
cond := apimeta.FindStatusCondition(ext.Status.Conditions, condType)
157156
if cond == nil {
158157
// Create a new condition with a valid reason and add it
159-
newCond := metav1.Condition{
158+
SetStatusCondition(&ext.Status.Conditions, metav1.Condition{
160159
Type: condType,
161160
Status: metav1.ConditionFalse,
162161
Reason: string(reason),
163162
Message: message,
164163
ObservedGeneration: ext.GetGeneration(),
165-
LastTransitionTime: metav1.NewTime(time.Now()),
166-
}
167-
ext.Status.Conditions = append(ext.Status.Conditions, newCond)
164+
})
168165
}
169166
}
170167
}
@@ -381,7 +378,7 @@ func SetDeprecationStatus(ext *ocv1.ClusterExtension, bundleName string, depreca
381378
if len(deprecationMessages) > 0 {
382379
status, reason, message = metav1.ConditionTrue, ocv1.ReasonDeprecated, strings.Join(deprecationMessages, ";")
383380
}
384-
apimeta.SetStatusCondition(&ext.Status.Conditions, metav1.Condition{
381+
SetStatusCondition(&ext.Status.Conditions, metav1.Condition{
385382
Type: ocv1.TypeDeprecated,
386383
Reason: reason,
387384
Status: status,
@@ -403,7 +400,7 @@ func SetDeprecationStatus(ext *ocv1.ClusterExtension, bundleName string, depreca
403400
message = fmt.Sprintf("%s\n%s", message, entry.Message)
404401
}
405402
}
406-
apimeta.SetStatusCondition(&ext.Status.Conditions, metav1.Condition{
403+
SetStatusCondition(&ext.Status.Conditions, metav1.Condition{
407404
Type: conditionType,
408405
Reason: reason,
409406
Status: status,

internal/operator-controller/controllers/common_controller.go

Lines changed: 28 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,30 @@ import (
2727
ocv1 "github.com/operator-framework/operator-controller/api/v1"
2828
)
2929

30+
const (
31+
// maxConditionMessageLength set the max message length allowed by Kubernetes.
32+
maxConditionMessageLength = 32768
33+
// truncationSuffix is the suffix added when a message is cut.
34+
truncationSuffix = "\n\n... [message truncated]"
35+
)
36+
37+
// truncateMessage cuts long messages to fit Kubernetes condition limits
38+
func truncateMessage(message string) string {
39+
if len(message) <= maxConditionMessageLength {
40+
return message
41+
}
42+
43+
maxContent := maxConditionMessageLength - len(truncationSuffix)
44+
return message[:maxContent] + truncationSuffix
45+
}
46+
47+
// SetStatusCondition wraps apimeta.SetStatusCondition and ensures the message is always truncated
48+
// This should be used throughout the codebase instead of apimeta.SetStatusCondition directly
49+
func SetStatusCondition(conditions *[]metav1.Condition, condition metav1.Condition) {
50+
condition.Message = truncateMessage(condition.Message)
51+
apimeta.SetStatusCondition(conditions, condition)
52+
}
53+
3054
// setInstalledStatusFromBundle sets the installed status based on the given installedBundle.
3155
func setInstalledStatusFromBundle(ext *ocv1.ClusterExtension, installedBundle *InstalledBundle) {
3256
// Nothing is installed
@@ -45,7 +69,7 @@ func setInstalledStatusFromBundle(ext *ocv1.ClusterExtension, installedBundle *I
4569

4670
// setInstalledStatusConditionSuccess sets the installed status condition to success.
4771
func setInstalledStatusConditionSuccess(ext *ocv1.ClusterExtension, message string) {
48-
apimeta.SetStatusCondition(&ext.Status.Conditions, metav1.Condition{
72+
SetStatusCondition(&ext.Status.Conditions, metav1.Condition{
4973
Type: ocv1.TypeInstalled,
5074
Status: metav1.ConditionTrue,
5175
Reason: ocv1.ReasonSucceeded,
@@ -56,7 +80,7 @@ func setInstalledStatusConditionSuccess(ext *ocv1.ClusterExtension, message stri
5680

5781
// setInstalledStatusConditionFailed sets the installed status condition to failed.
5882
func setInstalledStatusConditionFailed(ext *ocv1.ClusterExtension, message string) {
59-
apimeta.SetStatusCondition(&ext.Status.Conditions, metav1.Condition{
83+
SetStatusCondition(&ext.Status.Conditions, metav1.Condition{
6084
Type: ocv1.TypeInstalled,
6185
Status: metav1.ConditionFalse,
6286
Reason: ocv1.ReasonFailed,
@@ -67,7 +91,7 @@ func setInstalledStatusConditionFailed(ext *ocv1.ClusterExtension, message strin
6791

6892
// setInstalledStatusConditionUnknown sets the installed status condition to unknown.
6993
func setInstalledStatusConditionUnknown(ext *ocv1.ClusterExtension, message string) {
70-
apimeta.SetStatusCondition(&ext.Status.Conditions, metav1.Condition{
94+
SetStatusCondition(&ext.Status.Conditions, metav1.Condition{
7195
Type: ocv1.TypeInstalled,
7296
Status: metav1.ConditionUnknown,
7397
Reason: ocv1.ReasonFailed,
@@ -99,5 +123,5 @@ func setStatusProgressing(ext *ocv1.ClusterExtension, err error) {
99123
progressingCond.Reason = ocv1.ReasonBlocked
100124
}
101125

102-
apimeta.SetStatusCondition(&ext.Status.Conditions, progressingCond)
126+
SetStatusCondition(&ext.Status.Conditions, progressingCond)
103127
}

internal/operator-controller/controllers/common_controller_test.go

Lines changed: 176 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@ package controllers
22

33
import (
44
"errors"
5+
"fmt"
6+
"strings"
57
"testing"
68

79
"github.com/google/go-cmp/cmp"
@@ -64,3 +66,177 @@ func TestSetStatusProgressing(t *testing.T) {
6466
})
6567
}
6668
}
69+
70+
func TestTruncateMessage(t *testing.T) {
71+
tests := []struct {
72+
name string
73+
message string
74+
expected string
75+
}{
76+
{
77+
name: "short message unchanged",
78+
message: "This is a short message",
79+
expected: "This is a short message",
80+
},
81+
{
82+
name: "empty message unchanged",
83+
message: "",
84+
expected: "",
85+
},
86+
{
87+
name: "exact max length message unchanged",
88+
message: strings.Repeat("a", maxConditionMessageLength),
89+
expected: strings.Repeat("a", maxConditionMessageLength),
90+
},
91+
{
92+
name: "message just over limit gets truncated",
93+
message: strings.Repeat("a", maxConditionMessageLength+1),
94+
expected: strings.Repeat("a", maxConditionMessageLength-len(truncationSuffix)) + truncationSuffix,
95+
},
96+
{
97+
name: "very long message gets truncated",
98+
message: strings.Repeat("word ", 10000) + "finalword",
99+
expected: strings.Repeat("word ", 10000)[:maxConditionMessageLength-len(truncationSuffix)] + truncationSuffix,
100+
},
101+
}
102+
103+
for _, tc := range tests {
104+
t.Run(tc.name, func(t *testing.T) {
105+
result := truncateMessage(tc.message)
106+
require.Equal(t, tc.expected, result)
107+
108+
// Verify the result is within the limit
109+
require.LessOrEqual(t, len(result), maxConditionMessageLength,
110+
"truncated message should not exceed max length")
111+
112+
// If the original message was over the limit, verify truncation occurred
113+
if len(tc.message) > maxConditionMessageLength {
114+
require.Contains(t, result, truncationSuffix,
115+
"long messages should contain truncation suffix")
116+
require.Less(t, len(result), len(tc.message),
117+
"truncated message should be shorter than original")
118+
}
119+
})
120+
}
121+
}
122+
123+
func TestSetStatusProgressingWithLongMessage(t *testing.T) {
124+
// Simulate a real ClusterExtension CRD upgrade safety check failure with many validation errors
125+
longError := fmt.Sprintf("validating CRD upgrade safety for ClusterExtension 'my-operator': %s",
126+
strings.Repeat("CRD \"myresources.example.com\" v1beta1->v1: field .spec.replicas changed from optional to required, field .spec.config.timeout type changed from string to integer, field .status.conditions[].observedGeneration removed\n", 500))
127+
128+
ext := &ocv1.ClusterExtension{ObjectMeta: metav1.ObjectMeta{Name: "my-operator"}}
129+
err := errors.New(longError)
130+
setStatusProgressing(ext, err)
131+
132+
cond := meta.FindStatusCondition(ext.Status.Conditions, ocv1.TypeProgressing)
133+
require.NotNil(t, cond)
134+
require.LessOrEqual(t, len(cond.Message), maxConditionMessageLength)
135+
require.Contains(t, cond.Message, truncationSuffix)
136+
require.Contains(t, cond.Message, "validating CRD upgrade safety")
137+
}
138+
139+
func TestClusterExtensionDeprecationMessageTruncation(t *testing.T) {
140+
// Test truncation for ClusterExtension deprecation warnings with many deprecated APIs
141+
ext := &ocv1.ClusterExtension{ObjectMeta: metav1.ObjectMeta{Name: "legacy-operator"}}
142+
143+
// Simulate many deprecation warnings that would overflow the message limit
144+
deprecationMessages := []string{}
145+
for i := 0; i < 1000; i++ {
146+
deprecationMessages = append(deprecationMessages, fmt.Sprintf("API version 'v1beta1' of resource 'customresources%d.example.com' is deprecated, use 'v1' instead", i))
147+
}
148+
149+
longDeprecationMsg := strings.Join(deprecationMessages, "; ")
150+
setInstalledStatusConditionUnknown(ext, longDeprecationMsg)
151+
152+
cond := meta.FindStatusCondition(ext.Status.Conditions, ocv1.TypeInstalled)
153+
require.NotNil(t, cond)
154+
require.LessOrEqual(t, len(cond.Message), maxConditionMessageLength)
155+
require.Contains(t, cond.Message, truncationSuffix, "deprecation messages should be truncated when too long")
156+
require.Contains(t, cond.Message, "API version", "should preserve important deprecation context")
157+
}
158+
159+
func TestClusterExtensionInstallationFailureTruncation(t *testing.T) {
160+
// Test truncation for ClusterExtension installation failures with many bundle validation errors
161+
installError := "failed to install ClusterExtension 'argocd-operator': bundle validation errors: " +
162+
strings.Repeat("resource 'deployments/argocd-server' missing required label 'app.kubernetes.io/name', resource 'services/argocd-server-metrics' has invalid port configuration, resource 'configmaps/argocd-cm' contains invalid YAML in data field 'application.yaml'\n", 400)
163+
164+
ext := &ocv1.ClusterExtension{ObjectMeta: metav1.ObjectMeta{Name: "argocd-operator"}}
165+
setInstalledStatusConditionFailed(ext, installError)
166+
167+
cond := meta.FindStatusCondition(ext.Status.Conditions, ocv1.TypeInstalled)
168+
require.NotNil(t, cond)
169+
170+
// Verify message was truncated due to length
171+
require.LessOrEqual(t, len(cond.Message), maxConditionMessageLength)
172+
require.Contains(t, cond.Message, truncationSuffix, "installation failure messages should be truncated when too long")
173+
require.Contains(t, cond.Message, "failed to install ClusterExtension", "should preserve important context")
174+
require.Equal(t, metav1.ConditionFalse, cond.Status)
175+
require.Equal(t, ocv1.ReasonFailed, cond.Reason)
176+
177+
// Verify original message was actually longer than the limit
178+
require.Greater(t, len(installError), maxConditionMessageLength, "test should use a message that exceeds the limit")
179+
}
180+
181+
func TestSetStatusConditionWrapper(t *testing.T) {
182+
tests := []struct {
183+
name string
184+
message string
185+
expectedTruncated bool
186+
}{
187+
{
188+
name: "short message not truncated",
189+
message: "This is a short message",
190+
expectedTruncated: false,
191+
},
192+
{
193+
name: "long message gets truncated",
194+
message: strings.Repeat("This is a very long message. ", 2000),
195+
expectedTruncated: true,
196+
},
197+
{
198+
name: "message at exact limit not truncated",
199+
message: strings.Repeat("a", maxConditionMessageLength),
200+
expectedTruncated: false,
201+
},
202+
{
203+
name: "message over limit gets truncated",
204+
message: strings.Repeat("a", maxConditionMessageLength+1),
205+
expectedTruncated: true,
206+
},
207+
}
208+
209+
for _, tc := range tests {
210+
t.Run(tc.name, func(t *testing.T) {
211+
var conditions []metav1.Condition
212+
213+
// Use our wrapper function
214+
SetStatusCondition(&conditions, metav1.Condition{
215+
Type: "TestCondition",
216+
Status: metav1.ConditionTrue,
217+
Reason: "Testing",
218+
Message: tc.message,
219+
})
220+
221+
require.Len(t, conditions, 1, "should have exactly one condition")
222+
cond := conditions[0]
223+
224+
// Verify message is within limits
225+
require.LessOrEqual(t, len(cond.Message), maxConditionMessageLength,
226+
"condition message should not exceed max length")
227+
228+
// Check if truncation occurred as expected
229+
if tc.expectedTruncated {
230+
require.Contains(t, cond.Message, truncationSuffix,
231+
"long messages should contain truncation suffix")
232+
require.Less(t, len(cond.Message), len(tc.message),
233+
"truncated message should be shorter than original")
234+
} else {
235+
require.Equal(t, tc.message, cond.Message,
236+
"short messages should remain unchanged")
237+
require.NotContains(t, cond.Message, truncationSuffix,
238+
"short messages should not contain truncation suffix")
239+
}
240+
})
241+
}
242+
}

0 commit comments

Comments
 (0)