Skip to content

Commit 3460573

Browse files
authored
Merge pull request kubernetes#81399 from roycaihw/webhook-rejection-metrics
Fix the rejected label semantics in webhook metrics, add a counter metrics for webhook rejection with details
2 parents a9f0db1 + 620f5f2 commit 3460573

File tree

6 files changed

+159
-11
lines changed

6 files changed

+159
-11
lines changed

staging/src/k8s.io/apiserver/pkg/admission/metrics/metrics.go

Lines changed: 38 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -27,9 +27,21 @@ import (
2727
"k8s.io/component-base/metrics/legacyregistry"
2828
)
2929

30+
// WebhookRejectionErrorType defines different error types that happen in a webhook rejection.
31+
type WebhookRejectionErrorType string
32+
3033
const (
3134
namespace = "apiserver"
3235
subsystem = "admission"
36+
37+
// WebhookRejectionCallingWebhookError identifies a calling webhook error which causes
38+
// a webhook admission to reject a request
39+
WebhookRejectionCallingWebhookError WebhookRejectionErrorType = "calling_webhook_error"
40+
// WebhookRejectionAPIServerInternalError identifies an apiserver internal error which
41+
// causes a webhook admission to reject a request
42+
WebhookRejectionAPIServerInternalError WebhookRejectionErrorType = "apiserver_internal_error"
43+
// WebhookRejectionNoError identifies a webhook properly rejected a request
44+
WebhookRejectionNoError WebhookRejectionErrorType = "no_error"
3345
)
3446

3547
var (
@@ -103,9 +115,10 @@ func (p pluginHandlerWithMetrics) Validate(ctx context.Context, a admission.Attr
103115

104116
// AdmissionMetrics instruments admission with prometheus metrics.
105117
type AdmissionMetrics struct {
106-
step *metricSet
107-
controller *metricSet
108-
webhook *metricSet
118+
step *metricSet
119+
controller *metricSet
120+
webhook *metricSet
121+
webhookRejection *metrics.CounterVec
109122
}
110123

111124
// newAdmissionMetrics create a new AdmissionMetrics, configured with default metric names.
@@ -126,10 +139,21 @@ func newAdmissionMetrics() *AdmissionMetrics {
126139
[]string{"name", "type", "operation", "rejected"},
127140
"Admission webhook %s, identified by name and broken out for each operation and API resource and type (validate or admit).", false)
128141

142+
webhookRejection := metrics.NewCounterVec(
143+
&metrics.CounterOpts{
144+
Namespace: namespace,
145+
Subsystem: subsystem,
146+
Name: "webhook_rejection_count",
147+
Help: "Admission webhook rejection count, identified by name and broken out for each admission type (validating or admit) and operation. Additional labels specify an error type (calling_webhook_error or apiserver_internal_error if an error occurred; no_error otherwise) and optionally a non-zero rejection code if the webhook rejects the request with an HTTP status code (honored by the apiserver when the code is greater or equal to 400). Codes greater than 600 are truncated to 600, to keep the metrics cardinality bounded.",
148+
StabilityLevel: metrics.ALPHA,
149+
},
150+
[]string{"name", "type", "operation", "error_type", "rejection_code"})
151+
129152
step.mustRegister()
130153
controller.mustRegister()
131154
webhook.mustRegister()
132-
return &AdmissionMetrics{step: step, controller: controller, webhook: webhook}
155+
legacyregistry.MustRegister(webhookRejection)
156+
return &AdmissionMetrics{step: step, controller: controller, webhook: webhook, webhookRejection: webhookRejection}
133157
}
134158

135159
func (m *AdmissionMetrics) reset() {
@@ -153,6 +177,16 @@ func (m *AdmissionMetrics) ObserveWebhook(elapsed time.Duration, rejected bool,
153177
m.webhook.observe(elapsed, append(extraLabels, stepType, string(attr.GetOperation()), strconv.FormatBool(rejected))...)
154178
}
155179

180+
// ObserveWebhookRejection records admission related metrics for an admission webhook rejection.
181+
func (m *AdmissionMetrics) ObserveWebhookRejection(name, stepType, operation string, errorType WebhookRejectionErrorType, rejectionCode int) {
182+
// We truncate codes greater than 600 to keep the cardinality bounded.
183+
// This should be rarely done by a malfunctioning webhook server.
184+
if rejectionCode > 600 {
185+
rejectionCode = 600
186+
}
187+
m.webhookRejection.WithLabelValues(name, stepType, operation, string(errorType), strconv.Itoa(rejectionCode)).Inc()
188+
}
189+
156190
type metricSet struct {
157191
latencies *metrics.HistogramVec
158192
latenciesSummary *metrics.SummaryVec

staging/src/k8s.io/apiserver/pkg/admission/metrics/metrics_test.go

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,37 @@ func TestObserveWebhook(t *testing.T) {
8181
expectHistogramCountTotal(t, "apiserver_admission_webhook_admission_duration_seconds", wantLabels, 1)
8282
}
8383

84+
func TestObserveWebhookRejection(t *testing.T) {
85+
Metrics.reset()
86+
Metrics.ObserveWebhookRejection("x", stepAdmit, string(admission.Create), WebhookRejectionNoError, 500)
87+
Metrics.ObserveWebhookRejection("x", stepAdmit, string(admission.Create), WebhookRejectionNoError, 654)
88+
Metrics.ObserveWebhookRejection("x", stepValidate, string(admission.Update), WebhookRejectionCallingWebhookError, 0)
89+
wantLabels := map[string]string{
90+
"name": "x",
91+
"operation": string(admission.Create),
92+
"type": "admit",
93+
"error_type": "no_error",
94+
"rejection_code": "500",
95+
}
96+
wantLabels600 := map[string]string{
97+
"name": "x",
98+
"operation": string(admission.Create),
99+
"type": "admit",
100+
"error_type": "no_error",
101+
"rejection_code": "600",
102+
}
103+
wantLabelsCallingWebhookError := map[string]string{
104+
"name": "x",
105+
"operation": string(admission.Update),
106+
"type": "validate",
107+
"error_type": "calling_webhook_error",
108+
"rejection_code": "0",
109+
}
110+
expectCounterValue(t, "apiserver_admission_webhook_rejection_count", wantLabels, 1)
111+
expectCounterValue(t, "apiserver_admission_webhook_rejection_count", wantLabels600, 1)
112+
expectCounterValue(t, "apiserver_admission_webhook_rejection_count", wantLabelsCallingWebhookError, 1)
113+
}
114+
84115
func TestWithMetrics(t *testing.T) {
85116
Metrics.reset()
86117

staging/src/k8s.io/apiserver/pkg/admission/metrics/testutil_test.go

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,3 +89,35 @@ func expectHistogramCountTotal(t *testing.T, name string, labelFilter map[string
8989
}
9090
}
9191
}
92+
93+
// expectCounterValue ensures that the counts of metrics matching the labelFilter is as
94+
// expected.
95+
func expectCounterValue(t *testing.T, name string, labelFilter map[string]string, wantCount int) {
96+
metrics, err := prometheus.DefaultGatherer.Gather()
97+
if err != nil {
98+
t.Fatalf("Failed to gather metrics: %s", err)
99+
}
100+
101+
counterSum := 0
102+
for _, mf := range metrics {
103+
if mf.GetName() != name {
104+
continue // Ignore other metrics.
105+
}
106+
for _, metric := range mf.GetMetric() {
107+
if !labelsMatch(metric, labelFilter) {
108+
continue
109+
}
110+
counterSum += int(metric.GetCounter().GetValue())
111+
}
112+
}
113+
if wantCount != counterSum {
114+
t.Errorf("Wanted count %d, got %d for metric %s with labels %#+v", wantCount, counterSum, name, labelFilter)
115+
for _, mf := range metrics {
116+
if mf.GetName() == name {
117+
for _, metric := range mf.GetMetric() {
118+
t.Logf("\tnear match: %s", metric.String())
119+
}
120+
}
121+
}
122+
}
123+
}

staging/src/k8s.io/apiserver/pkg/admission/plugin/webhook/mutating/dispatcher.go

Lines changed: 22 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -133,7 +133,24 @@ func (a *mutatingDispatcher) Dispatch(ctx context.Context, attr admission.Attrib
133133
round = 1
134134
}
135135
changed, err := a.callAttrMutatingHook(ctx, hook, invocation, versionedAttr, o, round, i)
136-
admissionmetrics.Metrics.ObserveWebhook(time.Since(t), err != nil, versionedAttr.Attributes, "admit", hook.Name)
136+
ignoreClientCallFailures := hook.FailurePolicy != nil && *hook.FailurePolicy == v1beta1.Ignore
137+
rejected := false
138+
if err != nil {
139+
switch err := err.(type) {
140+
case *webhookutil.ErrCallingWebhook:
141+
if !ignoreClientCallFailures {
142+
rejected = true
143+
admissionmetrics.Metrics.ObserveWebhookRejection(hook.Name, "admit", string(versionedAttr.Attributes.GetOperation()), admissionmetrics.WebhookRejectionCallingWebhookError, 0)
144+
}
145+
case *webhookutil.ErrWebhookRejection:
146+
rejected = true
147+
admissionmetrics.Metrics.ObserveWebhookRejection(hook.Name, "admit", string(versionedAttr.Attributes.GetOperation()), admissionmetrics.WebhookRejectionNoError, int(err.Status.ErrStatus.Code))
148+
default:
149+
rejected = true
150+
admissionmetrics.Metrics.ObserveWebhookRejection(hook.Name, "admit", string(versionedAttr.Attributes.GetOperation()), admissionmetrics.WebhookRejectionAPIServerInternalError, 0)
151+
}
152+
}
153+
admissionmetrics.Metrics.ObserveWebhook(time.Since(t), rejected, versionedAttr.Attributes, "admit", hook.Name)
137154
if changed {
138155
// Patch had changed the object. Prepare to reinvoke all previous webhooks that are eligible for re-invocation.
139156
webhookReinvokeCtx.RequireReinvokingPreviouslyInvokedPlugins()
@@ -146,7 +163,6 @@ func (a *mutatingDispatcher) Dispatch(ctx context.Context, attr admission.Attrib
146163
continue
147164
}
148165

149-
ignoreClientCallFailures := hook.FailurePolicy != nil && *hook.FailurePolicy == v1beta1.Ignore
150166
if callErr, ok := err.(*webhookutil.ErrCallingWebhook); ok {
151167
if ignoreClientCallFailures {
152168
klog.Warningf("Failed calling webhook, failing open %v: %v", hook.Name, callErr)
@@ -164,6 +180,9 @@ func (a *mutatingDispatcher) Dispatch(ctx context.Context, attr admission.Attrib
164180
klog.Warningf("Failed calling webhook, failing closed %v: %v", hook.Name, err)
165181
return apierrors.NewInternalError(err)
166182
}
183+
if rejectionErr, ok := err.(*webhookutil.ErrWebhookRejection); ok {
184+
return rejectionErr.Status
185+
}
167186
return err
168187
}
169188

@@ -249,7 +268,7 @@ func (a *mutatingDispatcher) callAttrMutatingHook(ctx context.Context, h *v1beta
249268
}
250269

251270
if !result.Allowed {
252-
return false, webhookerrors.ToStatusErr(h.Name, result.Result)
271+
return false, &webhookutil.ErrWebhookRejection{Status: webhookerrors.ToStatusErr(h.Name, result.Result)}
253272
}
254273

255274
if len(result.Patch) == 0 {

staging/src/k8s.io/apiserver/pkg/admission/plugin/webhook/validating/dispatcher.go

Lines changed: 22 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -101,12 +101,28 @@ func (d *validatingDispatcher) Dispatch(ctx context.Context, attr admission.Attr
101101
versionedAttr := versionedAttrs[invocation.Kind]
102102
t := time.Now()
103103
err := d.callHook(ctx, hook, invocation, versionedAttr)
104-
admissionmetrics.Metrics.ObserveWebhook(time.Since(t), err != nil, versionedAttr.Attributes, "validating", hook.Name)
104+
ignoreClientCallFailures := hook.FailurePolicy != nil && *hook.FailurePolicy == v1beta1.Ignore
105+
rejected := false
106+
if err != nil {
107+
switch err := err.(type) {
108+
case *webhookutil.ErrCallingWebhook:
109+
if !ignoreClientCallFailures {
110+
rejected = true
111+
admissionmetrics.Metrics.ObserveWebhookRejection(hook.Name, "validating", string(versionedAttr.Attributes.GetOperation()), admissionmetrics.WebhookRejectionCallingWebhookError, 0)
112+
}
113+
case *webhookutil.ErrWebhookRejection:
114+
rejected = true
115+
admissionmetrics.Metrics.ObserveWebhookRejection(hook.Name, "validating", string(versionedAttr.Attributes.GetOperation()), admissionmetrics.WebhookRejectionNoError, int(err.Status.ErrStatus.Code))
116+
default:
117+
rejected = true
118+
admissionmetrics.Metrics.ObserveWebhookRejection(hook.Name, "validating", string(versionedAttr.Attributes.GetOperation()), admissionmetrics.WebhookRejectionAPIServerInternalError, 0)
119+
}
120+
}
121+
admissionmetrics.Metrics.ObserveWebhook(time.Since(t), rejected, versionedAttr.Attributes, "validating", hook.Name)
105122
if err == nil {
106123
return
107124
}
108125

109-
ignoreClientCallFailures := hook.FailurePolicy != nil && *hook.FailurePolicy == v1beta1.Ignore
110126
if callErr, ok := err.(*webhookutil.ErrCallingWebhook); ok {
111127
if ignoreClientCallFailures {
112128
klog.Warningf("Failed calling webhook, failing open %v: %v", hook.Name, callErr)
@@ -119,6 +135,9 @@ func (d *validatingDispatcher) Dispatch(ctx context.Context, attr admission.Attr
119135
return
120136
}
121137

138+
if rejectionErr, ok := err.(*webhookutil.ErrWebhookRejection); ok {
139+
err = rejectionErr.Status
140+
}
122141
klog.Warningf("rejected by webhook %q: %#v", hook.Name, err)
123142
errCh <- err
124143
}(relevantHooks[i])
@@ -211,5 +230,5 @@ func (d *validatingDispatcher) callHook(ctx context.Context, h *v1beta1.Validati
211230
if result.Allowed {
212231
return nil
213232
}
214-
return webhookerrors.ToStatusErr(h.Name, result.Result)
233+
return &webhookutil.ErrWebhookRejection{Status: webhookerrors.ToStatusErr(h.Name, result.Result)}
215234
}

staging/src/k8s.io/apiserver/pkg/util/webhook/error.go

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,11 @@ limitations under the License.
1616

1717
package webhook
1818

19-
import "fmt"
19+
import (
20+
"fmt"
21+
22+
apierrors "k8s.io/apimachinery/pkg/api/errors"
23+
)
2024

2125
// ErrCallingWebhook is returned for transport-layer errors calling webhooks. It
2226
// represents a failure to talk to the webhook, not the webhook rejecting a
@@ -32,3 +36,12 @@ func (e *ErrCallingWebhook) Error() string {
3236
}
3337
return fmt.Sprintf("failed calling webhook %q; no further details available", e.WebhookName)
3438
}
39+
40+
// ErrWebhookRejection represents a webhook properly rejecting a request.
41+
type ErrWebhookRejection struct {
42+
Status *apierrors.StatusError
43+
}
44+
45+
func (e *ErrWebhookRejection) Error() string {
46+
return e.Status.Error()
47+
}

0 commit comments

Comments
 (0)