Skip to content

Commit 5711200

Browse files
committed
feat: support custom metrics (#2480)
Signed-off-by: ashing <[email protected]>
1 parent 007fe7e commit 5711200

File tree

10 files changed

+675
-488
lines changed

10 files changed

+675
-488
lines changed

go.mod

Lines changed: 130 additions & 102 deletions
Large diffs are not rendered by default.

go.sum

Lines changed: 318 additions & 372 deletions
Large diffs are not rendered by default.

internal/controller/status/updater.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,8 @@ import (
2626
"k8s.io/apimachinery/pkg/types"
2727
"k8s.io/client-go/util/retry"
2828
"sigs.k8s.io/controller-runtime/pkg/client"
29+
30+
pkgmetrics "github.com/apache/apisix-ingress-controller/pkg/metrics"
2931
)
3032

3133
const UpdateChannelBufferSize = 1000
@@ -110,6 +112,8 @@ func (u *UpdateHandler) Start(ctx context.Context) error {
110112
case <-ctx.Done():
111113
return nil
112114
case update := <-u.updateChannel:
115+
// Decrement queue length after removing item from queue
116+
pkgmetrics.DecStatusQueueLength()
113117
u.log.Info("received a status update", "namespace", update.NamespacedName.Namespace,
114118
"name", update.NamespacedName.Name)
115119

@@ -137,4 +141,6 @@ type UpdateWriter struct {
137141
func (u *UpdateWriter) Update(update Update) {
138142
u.wg.Wait()
139143
u.updateChannel <- update
144+
// Increment queue length after adding new item
145+
pkgmetrics.IncStatusQueueLength()
140146
}

internal/manager/run.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ import (
4242
"github.com/apache/apisix-ingress-controller/internal/controller/config"
4343
"github.com/apache/apisix-ingress-controller/internal/controller/status"
4444
"github.com/apache/apisix-ingress-controller/internal/provider/adc"
45+
_ "github.com/apache/apisix-ingress-controller/pkg/metrics"
4546
)
4647

4748
var (

internal/provider/adc/adc.go

Lines changed: 55 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ import (
4343
"github.com/apache/apisix-ingress-controller/internal/provider/adc/translator"
4444
"github.com/apache/apisix-ingress-controller/internal/types"
4545
"github.com/apache/apisix-ingress-controller/internal/utils"
46+
pkgmetrics "github.com/apache/apisix-ingress-controller/pkg/metrics"
4647
)
4748

4849
type adcConfig struct {
@@ -389,6 +390,8 @@ func (d *adcClient) sync(ctx context.Context, task Task) error {
389390
return nil
390391
}
391392

393+
var errs types.ADCExecutionErrors
394+
392395
// for global rules, we need to list all global rules and set it to the task resources
393396
if slices.Contains(task.ResourceTypes, "global_rule") {
394397
for _, config := range task.configs {
@@ -410,42 +413,90 @@ func (d *adcClient) sync(ctx context.Context, task Task) error {
410413
task.Resources.GlobalRules = globalrule
411414
log.Debugw("syncing resources global rules", zap.Any("globalRules", task.Resources.GlobalRules))
412415

416+
fileIOStart := time.Now()
413417
syncFilePath, cleanup, err := prepareSyncFile(task.Resources)
414418
if err != nil {
419+
pkgmetrics.RecordFileIODuration("prepare_sync_file", "failure", time.Since(fileIOStart).Seconds())
415420
return err
416421
}
422+
pkgmetrics.RecordFileIODuration("prepare_sync_file", "success", time.Since(fileIOStart).Seconds())
417423
defer cleanup()
418424

419425
args := BuildADCExecuteArgs(syncFilePath, task.Labels, task.ResourceTypes)
420426

421-
if err := d.executor.Execute(ctx, d.BackendMode, config, args); err != nil {
427+
// Record sync duration for each config
428+
startTime := time.Now()
429+
resourceType := strings.Join(task.ResourceTypes, ",")
430+
if resourceType == "" {
431+
resourceType = "all"
432+
}
433+
434+
err = d.executor.Execute(ctx, d.BackendMode, config, args)
435+
duration := time.Since(startTime).Seconds()
436+
437+
status := "success"
438+
if err != nil {
439+
status = "failure"
422440
log.Errorw("failed to execute adc command", zap.Error(err), zap.Any("config", config))
423-
return err
441+
442+
var execErr types.ADCExecutionError
443+
if errors.As(err, &execErr) {
444+
errs.Errors = append(errs.Errors, execErr)
445+
pkgmetrics.RecordExecutionError(config.Name, execErr.Name)
446+
} else {
447+
pkgmetrics.RecordExecutionError(config.Name, "unknown")
448+
}
424449
}
450+
451+
// Record metrics
452+
pkgmetrics.RecordSyncDuration(config.Name, resourceType, status, duration)
425453
}
426454

427455
return nil
428456
}
429457

458+
// Record file I/O duration
459+
fileIOStart := time.Now()
430460
// every task resources is the same, so we can use the first config to prepare the sync file
431461
syncFilePath, cleanup, err := prepareSyncFile(task.Resources)
432462
if err != nil {
463+
pkgmetrics.RecordFileIODuration("prepare_sync_file", "failure", time.Since(fileIOStart).Seconds())
433464
return err
434465
}
466+
pkgmetrics.RecordFileIODuration("prepare_sync_file", "success", time.Since(fileIOStart).Seconds())
435467
defer cleanup()
436468

437469
args := BuildADCExecuteArgs(syncFilePath, task.Labels, task.ResourceTypes)
438470

439-
var errs types.ADCExecutionErrors
440471
for _, config := range task.configs {
441-
if err := d.executor.Execute(ctx, d.BackendMode, config, args); err != nil {
472+
// Record sync duration for each config
473+
startTime := time.Now()
474+
resourceType := strings.Join(task.ResourceTypes, ",")
475+
if resourceType == "" {
476+
resourceType = "all"
477+
}
478+
479+
err := d.executor.Execute(ctx, d.BackendMode, config, args)
480+
duration := time.Since(startTime).Seconds()
481+
482+
status := "success"
483+
if err != nil {
484+
status = "failure"
442485
log.Errorw("failed to execute adc command", zap.Error(err), zap.Any("config", config))
486+
443487
var execErr types.ADCExecutionError
444488
if errors.As(err, &execErr) {
445489
errs.Errors = append(errs.Errors, execErr)
490+
pkgmetrics.RecordExecutionError(config.Name, execErr.Name)
491+
} else {
492+
pkgmetrics.RecordExecutionError(config.Name, "unknown")
446493
}
447494
}
495+
496+
// Record metrics
497+
pkgmetrics.RecordSyncDuration(config.Name, resourceType, status, duration)
448498
}
499+
449500
if len(errs.Errors) > 0 {
450501
return errs
451502
}

pkg/metrics/metrics.go

Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,114 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
package metrics
19+
20+
import (
21+
"github.com/prometheus/client_golang/prometheus"
22+
"sigs.k8s.io/controller-runtime/pkg/metrics"
23+
)
24+
25+
var (
26+
// ADC sync operation duration histogram
27+
ADCSyncDuration = prometheus.NewHistogramVec(
28+
prometheus.HistogramOpts{
29+
Name: "apisix_ingress_adc_sync_duration_seconds",
30+
Help: "Time spent on ADC sync operations",
31+
Buckets: prometheus.DefBuckets,
32+
},
33+
[]string{"config_name", "resource_type", "status"},
34+
)
35+
36+
// ADC sync operation counter
37+
ADCSyncTotal = prometheus.NewCounterVec(
38+
prometheus.CounterOpts{
39+
Name: "apisix_ingress_adc_sync_total",
40+
Help: "Total number of ADC sync operations",
41+
},
42+
[]string{"config_name", "resource_type", "status"},
43+
)
44+
45+
// ADC execution errors counter
46+
ADCExecutionErrors = prometheus.NewCounterVec(
47+
prometheus.CounterOpts{
48+
Name: "apisix_ingress_adc_execution_errors_total",
49+
Help: "Total number of ADC execution errors",
50+
},
51+
[]string{"config_name", "error_type"},
52+
)
53+
54+
// Status update channel queue length gauge
55+
StatusUpdateQueueLength = prometheus.NewGauge(
56+
prometheus.GaugeOpts{
57+
Name: "apisix_ingress_status_update_queue_length",
58+
Help: "Current length of the status update queue",
59+
},
60+
)
61+
62+
// File I/O operation duration histogram
63+
FileIODuration = prometheus.NewHistogramVec(
64+
prometheus.HistogramOpts{
65+
Name: "apisix_ingress_file_io_duration_seconds",
66+
Help: "Time spent on file I/O operations",
67+
Buckets: prometheus.DefBuckets,
68+
},
69+
[]string{"operation", "status"},
70+
)
71+
)
72+
73+
// init registers all metrics with the global prometheus registry
74+
func init() {
75+
// Register metrics with controller-runtime's metrics registry
76+
metrics.Registry.MustRegister(
77+
ADCSyncDuration,
78+
ADCSyncTotal,
79+
ADCExecutionErrors,
80+
StatusUpdateQueueLength,
81+
FileIODuration,
82+
)
83+
}
84+
85+
// RecordSyncDuration records the duration of an ADC sync operation
86+
func RecordSyncDuration(configName, resourceType, status string, duration float64) {
87+
ADCSyncDuration.WithLabelValues(configName, resourceType, status).Observe(duration)
88+
ADCSyncTotal.WithLabelValues(configName, resourceType, status).Inc()
89+
}
90+
91+
// RecordExecutionError records an ADC execution error
92+
func RecordExecutionError(configName, errorType string) {
93+
ADCExecutionErrors.WithLabelValues(configName, errorType).Inc()
94+
}
95+
96+
// UpdateStatusQueueLength updates the status update queue length gauge
97+
func UpdateStatusQueueLength(length float64) {
98+
StatusUpdateQueueLength.Set(length)
99+
}
100+
101+
// IncStatusQueueLength increments the status update queue length gauge by 1
102+
func IncStatusQueueLength() {
103+
StatusUpdateQueueLength.Inc()
104+
}
105+
106+
// DecStatusQueueLength decrements the status update queue length gauge by 1
107+
func DecStatusQueueLength() {
108+
StatusUpdateQueueLength.Dec()
109+
}
110+
111+
// RecordFileIODuration records the duration of a file I/O operation
112+
func RecordFileIODuration(operation, status string, duration float64) {
113+
FileIODuration.WithLabelValues(operation, status).Observe(duration)
114+
}

test/e2e/apisix/route.go

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ package apisix
2020
import (
2121
"context"
2222
"fmt"
23+
"io"
2324
"net"
2425
"net/http"
2526
"time"
@@ -96,6 +97,37 @@ spec:
9697
err := s.DeleteResource("ApisixRoute", "default")
9798
Expect(err).ShouldNot(HaveOccurred(), "deleting ApisixRoute")
9899
Eventually(request).WithArguments("/headers").WithTimeout(8 * time.Second).ProbeEvery(time.Second).Should(Equal(http.StatusNotFound))
100+
101+
By("request /metrics endpoint from controller")
102+
103+
// Get the metrics service endpoint
104+
metricsURL := s.GetMetricsEndpoint()
105+
106+
By("verify metrics content")
107+
resp, err := http.Get(metricsURL)
108+
Expect(err).ShouldNot(HaveOccurred(), "request metrics endpoint")
109+
defer func() {
110+
_ = resp.Body.Close()
111+
}()
112+
113+
Expect(resp.StatusCode).Should(Equal(http.StatusOK))
114+
115+
body, err := io.ReadAll(resp.Body)
116+
Expect(err).ShouldNot(HaveOccurred(), "read metrics response")
117+
118+
bodyStr := string(body)
119+
120+
// Verify prometheus format
121+
Expect(resp.Header.Get("Content-Type")).Should(ContainSubstring("text/plain; version=0.0.4; charset=utf-8"))
122+
123+
// Verify specific metrics from metrics.go exist
124+
Expect(bodyStr).Should(ContainSubstring("apisix_ingress_adc_sync_duration_seconds"))
125+
Expect(bodyStr).Should(ContainSubstring("apisix_ingress_adc_sync_total"))
126+
Expect(bodyStr).Should(ContainSubstring("apisix_ingress_status_update_queue_length"))
127+
Expect(bodyStr).Should(ContainSubstring("apisix_ingress_file_io_duration_seconds"))
128+
129+
// Log metrics for debugging
130+
fmt.Printf("Metrics endpoint response:\n%s\n", bodyStr)
99131
})
100132

101133
It("Test plugins in ApisixRoute", func() {

test/e2e/framework/manifests/apisix.yaml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,9 @@ spec:
9090
- name: admin
9191
containerPort: 9180
9292
protocol: TCP
93+
- name: control
94+
containerPort: 9090
95+
protocol: TCP
9396
volumeMounts:
9497
- name: config-writable
9598
mountPath: /usr/local/apisix/conf

test/e2e/framework/manifests/ingress.yaml

Lines changed: 7 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -346,10 +346,10 @@ metadata:
346346
namespace: {{ .Namespace }}
347347
spec:
348348
ports:
349-
- name: https
350-
port: 8443
349+
- name: metrics
350+
port: 8080
351351
protocol: TCP
352-
targetPort: 8443
352+
targetPort: 8080
353353
selector:
354354
control-plane: controller-manager
355355
---
@@ -399,19 +399,16 @@ spec:
399399
initialDelaySeconds: 15
400400
periodSeconds: 20
401401
name: manager
402+
ports:
403+
- name: metrics
404+
containerPort: 8080
405+
protocol: TCP
402406
readinessProbe:
403407
httpGet:
404408
path: /readyz
405409
port: 8081
406410
initialDelaySeconds: 5
407411
periodSeconds: 10
408-
resources:
409-
limits:
410-
cpu: 500m
411-
memory: 128Mi
412-
requests:
413-
cpu: 10m
414-
memory: 64Mi
415412
securityContext:
416413
allowPrivilegeEscalation: false
417414
capabilities:

test/e2e/scaffold/scaffold.go

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -444,3 +444,12 @@ func NewClient(scheme, host string) *httpexpect.Expect {
444444
),
445445
})
446446
}
447+
448+
func (s *Scaffold) GetMetricsEndpoint() string {
449+
tunnel := k8s.NewTunnel(s.kubectlOptions, k8s.ResourceTypeService, "apisix-ingress-controller-manager-metrics-service", 8080, 8080)
450+
if err := tunnel.ForwardPortE(s.t); err != nil {
451+
return ""
452+
}
453+
s.addFinalizers(tunnel.Close)
454+
return fmt.Sprintf("http://%s/metrics", tunnel.Endpoint())
455+
}

0 commit comments

Comments
 (0)