Skip to content

Commit 870e9a9

Browse files
Merge pull request #29397 from vrutkovs/excessive-applies-debug
OCPBUGS-48126: kubeapiserver auditloganalyzer: show resources updated too often
2 parents 553866f + 974cc92 commit 870e9a9

File tree

3 files changed

+128
-11
lines changed

3 files changed

+128
-11
lines changed

pkg/monitortests/kubeapiserver/auditloganalyzer/handle_excessive_applies.go

Lines changed: 84 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,70 @@
11
package auditloganalyzer
22

33
import (
4+
"fmt"
5+
"sort"
6+
"strings"
7+
"sync"
8+
49
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
510
auditv1 "k8s.io/apiserver/pkg/apis/audit/v1"
611
"k8s.io/apiserver/pkg/authentication/serviceaccount"
7-
"strings"
8-
"sync"
912
)
1013

14+
type userApplyCount struct {
15+
User string
16+
Applies int
17+
}
18+
19+
type numberOfAppliesWithExamples struct {
20+
numberOfApplies int
21+
users map[string]int
22+
firstAuditEventID string
23+
lastAuditEventID string
24+
firstOccurredAt metav1.Time
25+
lastOccurredAt metav1.Time
26+
}
27+
28+
func (n numberOfAppliesWithExamples) toErrorString() string {
29+
appliesPerUser := []userApplyCount{}
30+
for k, v := range n.users {
31+
appliesPerUser = append(appliesPerUser, userApplyCount{k, v})
32+
}
33+
sort.Slice(appliesPerUser, func(i, j int) bool {
34+
return appliesPerUser[i].Applies > appliesPerUser[j].Applies
35+
})
36+
topUsernames := []string{}
37+
maxUsernames := 5
38+
if len(appliesPerUser) < maxUsernames {
39+
maxUsernames = len(appliesPerUser)
40+
}
41+
for i := 0; i < maxUsernames; i++ {
42+
topUsernames = append(topUsernames, appliesPerUser[i].User)
43+
}
44+
45+
return fmt.Sprintf(`
46+
Time: started %s, finished at %s
47+
Top 5 usernames: %s
48+
Audit Log IDs: %s ... %s
49+
`,
50+
n.firstOccurredAt,
51+
n.lastOccurredAt,
52+
strings.Join(topUsernames, ", "),
53+
n.firstAuditEventID,
54+
n.lastAuditEventID,
55+
)
56+
}
57+
1158
type excessiveApplies struct {
1259
lock sync.Mutex
1360
namespacesToUserToNumberOfApplies map[string]map[string]int
61+
resourcesToNumberOfApplies map[string]numberOfAppliesWithExamples
1462
}
1563

1664
func CheckForExcessiveApplies() *excessiveApplies {
1765
return &excessiveApplies{
1866
namespacesToUserToNumberOfApplies: map[string]map[string]int{},
67+
resourcesToNumberOfApplies: map[string]numberOfAppliesWithExamples{},
1968
}
2069
}
2170

@@ -41,6 +90,38 @@ func (s *excessiveApplies) HandleAuditLogEvent(auditEvent *auditv1.Event, beginn
4190
if !ok {
4291
users = map[string]int{}
4392
}
44-
users[auditEvent.User.Username] = users[auditEvent.User.Username] + 1
93+
users[auditEvent.User.Username] += 1
4594
s.namespacesToUserToNumberOfApplies[nsName] = users
95+
96+
obj := auditEvent.ObjectRef
97+
if obj == nil {
98+
return
99+
}
100+
resource := fmt.Sprintf("%s/%s", obj.Resource, obj.Name)
101+
if obj.Namespace != "" {
102+
resource = fmt.Sprintf("%s -n %s", resource, obj.Namespace)
103+
}
104+
if obj.APIGroup != "" {
105+
resource = fmt.Sprintf("%s.%s", obj.APIGroup, resource)
106+
}
107+
objApplies, ok := s.resourcesToNumberOfApplies[resource]
108+
if !ok {
109+
objApplies = numberOfAppliesWithExamples{
110+
numberOfApplies: 0,
111+
firstAuditEventID: string(auditEvent.AuditID),
112+
lastAuditEventID: string(auditEvent.AuditID),
113+
users: map[string]int{},
114+
firstOccurredAt: metav1.Time(auditEvent.RequestReceivedTimestamp),
115+
lastOccurredAt: metav1.Time(auditEvent.RequestReceivedTimestamp),
116+
}
117+
}
118+
objApplies.numberOfApplies += 1
119+
objApplies.lastAuditEventID = string(auditEvent.AuditID)
120+
objApplies.lastOccurredAt = metav1.Time(auditEvent.RequestReceivedTimestamp)
121+
userApplies, ok := objApplies.users[auditEvent.User.Username]
122+
if !ok {
123+
userApplies = 0
124+
}
125+
objApplies.users[auditEvent.User.Username] = userApplies + 1
126+
s.resourcesToNumberOfApplies[resource] = objApplies
46127
}

pkg/monitortests/kubeapiserver/auditloganalyzer/handle_invalid_requests.go

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,13 @@
11
package auditloganalyzer
22

33
import (
4-
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
5-
auditv1 "k8s.io/apiserver/pkg/apis/audit/v1"
6-
"k8s.io/apiserver/pkg/authentication/serviceaccount"
74
"net/http"
85
"strings"
96
"sync"
7+
8+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
9+
auditv1 "k8s.io/apiserver/pkg/apis/audit/v1"
10+
"k8s.io/apiserver/pkg/authentication/serviceaccount"
1011
)
1112

1213
type invalidRequests struct {
@@ -76,7 +77,7 @@ func CheckForInvalidMutations() *invalidRequests {
7677

7778
func isApply(auditEvent *auditv1.Event) bool {
7879
// only SSA
79-
if auditEvent.Verb != "patch" {
80+
if auditEvent.Verb != "patch" || auditEvent.Verb == "apply" {
8081
return false
8182
}
8283
// SSA requires a field manager

pkg/monitortests/kubeapiserver/auditloganalyzer/monitortest.go

Lines changed: 39 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -201,6 +201,7 @@ func (w *auditLogAnalyzer) EvaluateTestsFromConstructedIntervals(ctx context.Con
201201
flakes := []string{}
202202
for username, numberOfApplies := range usersToApplies {
203203
if numberOfApplies > 200 {
204+
errorMessage := fmt.Sprintf("user %v had %d applies, check the audit log and operator log to figure out why", username, numberOfApplies)
204205
switch username {
205206
case "system:serviceaccount:openshift-infra:serviceaccount-pull-secrets-controller",
206207
"system:serviceaccount:openshift-network-operator:cluster-network-operator",
@@ -210,9 +211,9 @@ func (w *auditLogAnalyzer) EvaluateTestsFromConstructedIntervals(ctx context.Con
210211

211212
// These usernames are already creating more than 200 applies, so flake instead of fail.
212213
// We really want to find a way to track namespaces created by the payload versus everything else.
213-
flakes = append(flakes, fmt.Sprintf("user %v had %d applies, check the audit log and operator log to figure out why", username, numberOfApplies))
214+
flakes = append(flakes, errorMessage)
214215
default:
215-
failures = append(failures, fmt.Sprintf("user %v had %d applies, check the audit log and operator log to figure out why", username, numberOfApplies))
216+
failures = append(failures, errorMessage)
216217
}
217218
}
218219
}
@@ -234,7 +235,7 @@ func (w *auditLogAnalyzer) EvaluateTestsFromConstructedIntervals(ctx context.Con
234235
&junitapi.JUnitTestCase{
235236
Name: testName,
236237
FailureOutput: &junitapi.FailureOutput{
237-
Message: strings.Join(failures, "\n"),
238+
Message: strings.Join(flakes, "\n"),
238239
Output: "details in audit log",
239240
},
240241
},
@@ -255,6 +256,40 @@ func (w *auditLogAnalyzer) EvaluateTestsFromConstructedIntervals(ctx context.Con
255256

256257
}
257258

259+
testName := `[Jira:"kube-apiserver"] API resources are not updated excessively`
260+
flakes := []string{}
261+
for resource, applies := range w.excessiveApplyChecker.resourcesToNumberOfApplies {
262+
if applies.numberOfApplies < 200 {
263+
continue
264+
}
265+
errorMessage := fmt.Sprintf("resource %s had %d applies, %s", resource, applies.numberOfApplies, applies.toErrorString())
266+
flakes = append(flakes, errorMessage)
267+
}
268+
switch {
269+
case len(flakes) > 1:
270+
ret = append(ret,
271+
&junitapi.JUnitTestCase{
272+
Name: testName,
273+
FailureOutput: &junitapi.FailureOutput{
274+
Message: strings.Join(flakes, "\n"),
275+
Output: "details in audit log",
276+
},
277+
},
278+
)
279+
ret = append(ret,
280+
&junitapi.JUnitTestCase{
281+
Name: testName,
282+
},
283+
)
284+
285+
default:
286+
ret = append(ret,
287+
&junitapi.JUnitTestCase{
288+
Name: testName,
289+
},
290+
)
291+
}
292+
258293
for verb, namespacesToUserToNumberOf422s := range w.invalidRequestsChecker.verbToNamespacesTouserToNumberOf422s {
259294
for _, namespace := range allPlatformNamespaces {
260295
testName := fmt.Sprintf("users in ns/%s must not produce too many invalid %q requests", namespace, verb)
@@ -345,7 +380,7 @@ func (w *auditLogAnalyzer) EvaluateTestsFromConstructedIntervals(ctx context.Con
345380
}
346381
}
347382

348-
testName := "API LBs follow /readyz of kube-apiserver and stop sending requests before server shutdowns for external clients"
383+
testName = "API LBs follow /readyz of kube-apiserver and stop sending requests before server shutdowns for external clients"
349384
switch {
350385
case len(w.requestsDuringShutdownChecker.auditIDs) > 0:
351386
ret = append(ret,

0 commit comments

Comments
 (0)