Skip to content

Commit 94fcceb

Browse files
authored
feat: support custom metrics (#2480)
Signed-off-by: ashing <[email protected]>
1 parent 1156414 commit 94fcceb

File tree

9 files changed

+199
-12
lines changed

9 files changed

+199
-12
lines changed

go.mod

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ require (
1717
github.com/onsi/ginkgo/v2 v2.20.0
1818
github.com/onsi/gomega v1.34.1
1919
github.com/pkg/errors v0.9.1
20+
github.com/prometheus/client_golang v1.19.1
2021
github.com/samber/lo v1.47.0
2122
github.com/spf13/cobra v1.8.1
2223
github.com/stretchr/testify v1.10.0
@@ -148,7 +149,6 @@ require (
148149
github.com/mxk/go-flowrate v0.0.0-20140419014527-cca7078d478f // indirect
149150
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect
150151
github.com/pquerna/otp v1.4.0 // indirect
151-
github.com/prometheus/client_golang v1.19.1 // indirect
152152
github.com/prometheus/client_model v0.6.1 // indirect
153153
github.com/prometheus/common v0.55.0 // indirect
154154
github.com/prometheus/procfs v0.15.1 // indirect

internal/controller/status/updater.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,8 @@ import (
2626
"k8s.io/apimachinery/pkg/types"
2727
"k8s.io/client-go/util/retry"
2828
"sigs.k8s.io/controller-runtime/pkg/client"
29+
30+
pkgmetrics "github.com/apache/apisix-ingress-controller/pkg/metrics"
2931
)
3032

3133
const UpdateChannelBufferSize = 1000
@@ -110,6 +112,8 @@ func (u *UpdateHandler) Start(ctx context.Context) error {
110112
case <-ctx.Done():
111113
return nil
112114
case update := <-u.updateChannel:
115+
// Decrement queue length after removing item from queue
116+
pkgmetrics.DecStatusQueueLength()
113117
u.log.Info("received a status update", "namespace", update.NamespacedName.Namespace,
114118
"name", update.NamespacedName.Name)
115119

@@ -137,4 +141,6 @@ type UpdateWriter struct {
137141
func (u *UpdateWriter) Update(update Update) {
138142
u.wg.Wait()
139143
u.updateChannel <- update
144+
// Increment queue length after adding new item
145+
pkgmetrics.IncStatusQueueLength()
140146
}

internal/manager/run.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ import (
4242
"github.com/apache/apisix-ingress-controller/internal/controller/config"
4343
"github.com/apache/apisix-ingress-controller/internal/controller/status"
4444
"github.com/apache/apisix-ingress-controller/internal/provider/adc"
45+
_ "github.com/apache/apisix-ingress-controller/pkg/metrics"
4546
)
4647

4748
var (

internal/provider/adc/adc.go

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ import (
4242
"github.com/apache/apisix-ingress-controller/internal/provider/adc/translator"
4343
"github.com/apache/apisix-ingress-controller/internal/types"
4444
"github.com/apache/apisix-ingress-controller/internal/utils"
45+
pkgmetrics "github.com/apache/apisix-ingress-controller/pkg/metrics"
4546
)
4647

4748
type adcConfig struct {
@@ -380,24 +381,48 @@ func (d *adcClient) sync(ctx context.Context, task Task) error {
380381
return nil
381382
}
382383

384+
// Record file I/O duration
385+
fileIOStart := time.Now()
383386
syncFilePath, cleanup, err := prepareSyncFile(task.Resources)
384387
if err != nil {
388+
pkgmetrics.RecordFileIODuration("prepare_sync_file", "failure", time.Since(fileIOStart).Seconds())
385389
return err
386390
}
391+
pkgmetrics.RecordFileIODuration("prepare_sync_file", "success", time.Since(fileIOStart).Seconds())
387392
defer cleanup()
388393

389394
args := BuildADCExecuteArgs(syncFilePath, task.Labels, task.ResourceTypes)
390395

391396
var errs types.ADCExecutionErrors
392397
for _, config := range task.configs {
393-
if err := d.executor.Execute(ctx, d.BackendMode, config, args); err != nil {
398+
// Record sync duration for each config
399+
startTime := time.Now()
400+
resourceType := strings.Join(task.ResourceTypes, ",")
401+
if resourceType == "" {
402+
resourceType = "all"
403+
}
404+
405+
err := d.executor.Execute(ctx, d.BackendMode, config, args)
406+
duration := time.Since(startTime).Seconds()
407+
408+
status := "success"
409+
if err != nil {
410+
status = "failure"
394411
log.Errorw("failed to execute adc command", zap.Error(err), zap.Any("config", config))
412+
395413
var execErr types.ADCExecutionError
396414
if errors.As(err, &execErr) {
397415
errs.Errors = append(errs.Errors, execErr)
416+
pkgmetrics.RecordExecutionError(config.Name, execErr.Name)
417+
} else {
418+
pkgmetrics.RecordExecutionError(config.Name, "unknown")
398419
}
399420
}
421+
422+
// Record metrics
423+
pkgmetrics.RecordSyncDuration(config.Name, resourceType, status, duration)
400424
}
425+
401426
if len(errs.Errors) > 0 {
402427
return errs
403428
}

pkg/metrics/metrics.go

Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,114 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
package metrics
19+
20+
import (
21+
"github.com/prometheus/client_golang/prometheus"
22+
"sigs.k8s.io/controller-runtime/pkg/metrics"
23+
)
24+
25+
var (
26+
// ADC sync operation duration histogram
27+
ADCSyncDuration = prometheus.NewHistogramVec(
28+
prometheus.HistogramOpts{
29+
Name: "apisix_ingress_adc_sync_duration_seconds",
30+
Help: "Time spent on ADC sync operations",
31+
Buckets: prometheus.DefBuckets,
32+
},
33+
[]string{"config_name", "resource_type", "status"},
34+
)
35+
36+
// ADC sync operation counter
37+
ADCSyncTotal = prometheus.NewCounterVec(
38+
prometheus.CounterOpts{
39+
Name: "apisix_ingress_adc_sync_total",
40+
Help: "Total number of ADC sync operations",
41+
},
42+
[]string{"config_name", "resource_type", "status"},
43+
)
44+
45+
// ADC execution errors counter
46+
ADCExecutionErrors = prometheus.NewCounterVec(
47+
prometheus.CounterOpts{
48+
Name: "apisix_ingress_adc_execution_errors_total",
49+
Help: "Total number of ADC execution errors",
50+
},
51+
[]string{"config_name", "error_type"},
52+
)
53+
54+
// Status update channel queue length gauge
55+
StatusUpdateQueueLength = prometheus.NewGauge(
56+
prometheus.GaugeOpts{
57+
Name: "apisix_ingress_status_update_queue_length",
58+
Help: "Current length of the status update queue",
59+
},
60+
)
61+
62+
// File I/O operation duration histogram
63+
FileIODuration = prometheus.NewHistogramVec(
64+
prometheus.HistogramOpts{
65+
Name: "apisix_ingress_file_io_duration_seconds",
66+
Help: "Time spent on file I/O operations",
67+
Buckets: prometheus.DefBuckets,
68+
},
69+
[]string{"operation", "status"},
70+
)
71+
)
72+
73+
// init registers all metrics with the global prometheus registry
74+
func init() {
75+
// Register metrics with controller-runtime's metrics registry
76+
metrics.Registry.MustRegister(
77+
ADCSyncDuration,
78+
ADCSyncTotal,
79+
ADCExecutionErrors,
80+
StatusUpdateQueueLength,
81+
FileIODuration,
82+
)
83+
}
84+
85+
// RecordSyncDuration records the duration of an ADC sync operation
86+
func RecordSyncDuration(configName, resourceType, status string, duration float64) {
87+
ADCSyncDuration.WithLabelValues(configName, resourceType, status).Observe(duration)
88+
ADCSyncTotal.WithLabelValues(configName, resourceType, status).Inc()
89+
}
90+
91+
// RecordExecutionError records an ADC execution error
92+
func RecordExecutionError(configName, errorType string) {
93+
ADCExecutionErrors.WithLabelValues(configName, errorType).Inc()
94+
}
95+
96+
// UpdateStatusQueueLength updates the status update queue length gauge
97+
func UpdateStatusQueueLength(length float64) {
98+
StatusUpdateQueueLength.Set(length)
99+
}
100+
101+
// IncStatusQueueLength increments the status update queue length gauge by 1
102+
func IncStatusQueueLength() {
103+
StatusUpdateQueueLength.Inc()
104+
}
105+
106+
// DecStatusQueueLength decrements the status update queue length gauge by 1
107+
func DecStatusQueueLength() {
108+
StatusUpdateQueueLength.Dec()
109+
}
110+
111+
// RecordFileIODuration records the duration of a file I/O operation
112+
func RecordFileIODuration(operation, status string, duration float64) {
113+
FileIODuration.WithLabelValues(operation, status).Observe(duration)
114+
}

test/e2e/apisix/route.go

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ package apisix
2020
import (
2121
"context"
2222
"fmt"
23+
"io"
2324
"net"
2425
"net/http"
2526
"time"
@@ -96,6 +97,37 @@ spec:
9697
err := s.DeleteResource("ApisixRoute", "default")
9798
Expect(err).ShouldNot(HaveOccurred(), "deleting ApisixRoute")
9899
Eventually(request).WithArguments("/headers").WithTimeout(8 * time.Second).ProbeEvery(time.Second).Should(Equal(http.StatusNotFound))
100+
101+
By("request /metrics endpoint from controller")
102+
103+
// Get the metrics service endpoint
104+
metricsURL := s.GetMetricsEndpoint()
105+
106+
By("verify metrics content")
107+
resp, err := http.Get(metricsURL)
108+
Expect(err).ShouldNot(HaveOccurred(), "request metrics endpoint")
109+
defer func() {
110+
_ = resp.Body.Close()
111+
}()
112+
113+
Expect(resp.StatusCode).Should(Equal(http.StatusOK))
114+
115+
body, err := io.ReadAll(resp.Body)
116+
Expect(err).ShouldNot(HaveOccurred(), "read metrics response")
117+
118+
bodyStr := string(body)
119+
120+
// Verify prometheus format
121+
Expect(resp.Header.Get("Content-Type")).Should(ContainSubstring("text/plain; version=0.0.4; charset=utf-8"))
122+
123+
// Verify specific metrics from metrics.go exist
124+
Expect(bodyStr).Should(ContainSubstring("apisix_ingress_adc_sync_duration_seconds"))
125+
Expect(bodyStr).Should(ContainSubstring("apisix_ingress_adc_sync_total"))
126+
Expect(bodyStr).Should(ContainSubstring("apisix_ingress_status_update_queue_length"))
127+
Expect(bodyStr).Should(ContainSubstring("apisix_ingress_file_io_duration_seconds"))
128+
129+
// Log metrics for debugging
130+
fmt.Printf("Metrics endpoint response:\n%s\n", bodyStr)
99131
})
100132

101133
It("Test plugins in ApisixRoute", func() {

test/e2e/framework/manifests/apisix.yaml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,9 @@ spec:
9090
- name: admin
9191
containerPort: 9180
9292
protocol: TCP
93+
- name: control
94+
containerPort: 9090
95+
protocol: TCP
9396
volumeMounts:
9497
- name: config-writable
9598
mountPath: /usr/local/apisix/conf

test/e2e/framework/manifests/ingress.yaml

Lines changed: 7 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -346,10 +346,10 @@ metadata:
346346
namespace: {{ .Namespace }}
347347
spec:
348348
ports:
349-
- name: https
350-
port: 8443
349+
- name: metrics
350+
port: 8080
351351
protocol: TCP
352-
targetPort: 8443
352+
targetPort: 8080
353353
selector:
354354
control-plane: controller-manager
355355
---
@@ -399,19 +399,16 @@ spec:
399399
initialDelaySeconds: 15
400400
periodSeconds: 20
401401
name: manager
402+
ports:
403+
- name: metrics
404+
containerPort: 8080
405+
protocol: TCP
402406
readinessProbe:
403407
httpGet:
404408
path: /readyz
405409
port: 8081
406410
initialDelaySeconds: 5
407411
periodSeconds: 10
408-
resources:
409-
limits:
410-
cpu: 500m
411-
memory: 128Mi
412-
requests:
413-
cpu: 10m
414-
memory: 64Mi
415412
securityContext:
416413
allowPrivilegeEscalation: false
417414
capabilities:

test/e2e/scaffold/scaffold.go

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -443,3 +443,12 @@ func NewClient(scheme, host string) *httpexpect.Expect {
443443
),
444444
})
445445
}
446+
447+
func (s *Scaffold) GetMetricsEndpoint() string {
448+
tunnel := k8s.NewTunnel(s.kubectlOptions, k8s.ResourceTypeService, "apisix-ingress-controller-manager-metrics-service", 8080, 8080)
449+
if err := tunnel.ForwardPortE(s.t); err != nil {
450+
return ""
451+
}
452+
s.addFinalizers(tunnel.Close)
453+
return fmt.Sprintf("http://%s/metrics", tunnel.Endpoint())
454+
}

0 commit comments

Comments
 (0)