Skip to content

Commit 045b164

Browse files
authored
Merge pull request #193 from Kaitou786/tk/add-cf-edge-error-by-path
Add opt-in cloudflare_zone_edge_errors_by_path metric
2 parents 3dc871d + efb63c0 commit 045b164

File tree

6 files changed

+206
-0
lines changed

6 files changed

+206
-0
lines changed

README.md

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@ The exporter can be configured using env variables or command flags.
6767
| `SCRAPE_INTERVAL` | scrape interval in seconds (will query cloudflare every SCRAPE_INTERVAL seconds), default `60` |
6868
| `METRICS_DENYLIST` | (Optional) cloudflare-exporter metrics to not export, comma delimited list of cloudflare-exporter metrics. If not set, all metrics are exported |
6969
| `ENABLE_PPROF` | (Optional) enable pprof profiling endpoints at `/debug/pprof/`. Accepts `true` or `false`, default `false`. **Warning**: Only enable in development/debugging environments |
70+
| `ENABLE_EDGE_ERRORS_BY_PATH` | (Optional) enable edge errors by path metric. Accepts `true` or `false`, default `false`. See [Edge Errors by Path Metric](#edge-errors-by-path-metric-opt-in) |
7071
| `ZONE_<NAME>` | `DEPRECATED since 0.0.5` (optional) Zone ID. Add zones you want to scrape by adding env vars in this format. You can find the zone ids in Cloudflare dashboards. |
7172
| `LOG_LEVEL` | Set loglevel. Options are error, warn, info, debug. default `error` |
7273

@@ -86,6 +87,7 @@ Corresponding flags:
8687
-scrape_interval=60: scrape interval in seconds, defaults to 60
8788
-metrics_denylist="": cloudflare-exporter metrics to not export, comma delimited list
8889
-enable_pprof=false: enable pprof profiling endpoints at /debug/pprof/
90+
-enable_edge_errors_by_path=false: enable edge errors by path metric (high cardinality, opt-in)
8991
-log_level="error": log level(error,warn,info,debug)
9092
```
9193

@@ -119,6 +121,7 @@ Note: `ZONE_<name>` configuration is not supported as flag.
119121
# HELP cloudflare_zone_requests_status_country_host Count of requests for zone per edge HTTP status per country per host
120122
# HELP cloudflare_zone_requests_browser_map_page_views_count Number of successful requests for HTML pages per zone
121123
# HELP cloudflare_zone_requests_total Number of requests for zone
124+
# HELP cloudflare_zone_edge_errors_by_path Number of edge errors (4xx and 5xx) by request path
122125
# HELP cloudflare_zone_threats_country Threats per zone per country
123126
# HELP cloudflare_zone_threats_total Threats per zone
124127
# HELP cloudflare_zone_uniques_total Uniques per zone
@@ -131,6 +134,14 @@ Note: `ZONE_<name>` configuration is not supported as flag.
131134
# HELP cloudflare_r2_storage_total_bytes Total storage used by R2
132135
```
133136

137+
### Edge Errors by Path Metric (Opt-in)
138+
139+
The `cloudflare_zone_edge_errors_by_path` metric tracks edge errors (4xx/5xx) by request path. This enables path-based filtering in alerts to exclude known-noisy endpoints while catching real issues.
140+
141+
**Disabled by default** due to high cardinality. Enable with `ENABLE_EDGE_ERRORS_BY_PATH=true`.
142+
143+
Paths are normalized to reduce cardinality (e.g., `/users/123``/users/:id`, UUIDs → `:uuid`).
144+
134145
## Helm chart repository
135146

136147
To deploy the exporter into Kubernetes, we recommend using our manager Helm repository:

cloudflare.go

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,24 @@ type cloudflareResponseLogpushZone struct {
8585
} `json:"viewer"`
8686
}
8787

88+
type cloudflareResponseEdgeErrorsByPath struct {
89+
Viewer struct {
90+
Zones []zoneRespEdgeErrorsByPath `json:"zones"`
91+
} `json:"viewer"`
92+
}
93+
94+
type zoneRespEdgeErrorsByPath struct {
95+
ZoneTag string `json:"zoneTag"`
96+
HTTPRequestsAdaptiveGroups []struct {
97+
Count uint64 `json:"count"`
98+
Dimensions struct {
99+
EdgeResponseStatus uint16 `json:"edgeResponseStatus"`
100+
ClientRequestHTTPHost string `json:"clientRequestHTTPHost"`
101+
ClientRequestPath string `json:"clientRequestPath"`
102+
} `json:"dimensions"`
103+
} `json:"httpRequestsAdaptiveGroups"`
104+
}
105+
88106
type logpushResponse struct {
89107
LogpushHealthAdaptiveGroups []struct {
90108
Count uint64 `json:"count"`
@@ -861,6 +879,53 @@ func fetchLogpushZone(zoneIDs []string) (*cloudflareResponseLogpushZone, error)
861879
return &resp, nil
862880
}
863881

882+
func fetchEdgeErrorsByPath(zoneIDs []string) (*cloudflareResponseEdgeErrorsByPath, error) {
883+
request := graphql.NewRequest(`
884+
query ($zoneIDs: [String!], $mintime: Time!, $maxtime: Time!, $limit: Int!) {
885+
viewer {
886+
zones(filter: { zoneTag_in: $zoneIDs }) {
887+
zoneTag
888+
httpRequestsAdaptiveGroups(
889+
limit: $limit
890+
filter: {
891+
datetime_geq: $mintime
892+
datetime_lt: $maxtime
893+
edgeResponseStatus_geq: 400
894+
}
895+
) {
896+
count
897+
dimensions {
898+
edgeResponseStatus
899+
clientRequestHTTPHost
900+
clientRequestPath
901+
}
902+
}
903+
}
904+
}
905+
}
906+
`)
907+
908+
now, now1mAgo := GetTimeRange()
909+
request.Var("limit", gqlQueryLimit)
910+
request.Var("maxtime", now)
911+
request.Var("mintime", now1mAgo)
912+
request.Var("zoneIDs", zoneIDs)
913+
914+
gql.Mu.RLock()
915+
defer gql.Mu.RUnlock()
916+
917+
ctx, cancel := context.WithTimeout(context.Background(), cftimeout)
918+
defer cancel()
919+
920+
var resp cloudflareResponseEdgeErrorsByPath
921+
if err := gql.Client.Run(ctx, request, &resp); err != nil {
922+
log.Errorf("failed to fetch edge errors by path, err:%v", err)
923+
return nil, err
924+
}
925+
926+
return &resp, nil
927+
}
928+
864929
func fetchR2Account(accountID string) (*cloudflareResponseR2Account, error) {
865930
request := graphql.NewRequest(`query($accountID: String!, $limit: Int!, $date: String!) {
866931
viewer {

main.go

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,9 @@ func fetchMetrics() {
146146

147147
wg.Add(1)
148148
go fetchLogpushAnalyticsForZone(filteredZones, &wg)
149+
150+
wg.Add(1)
151+
go fetchEdgeErrorsByPathAnalytics(filteredZones, &wg)
149152
} else if zoneCount > cfgraphqlreqlimit {
150153
for s := 0; s < zoneCount; s += cfgraphqlreqlimit {
151154
e := s + cfgraphqlreqlimit
@@ -163,6 +166,9 @@ func fetchMetrics() {
163166

164167
wg.Add(1)
165168
go fetchLogpushAnalyticsForZone(filteredZones[s:e], &wg)
169+
170+
wg.Add(1)
171+
go fetchEdgeErrorsByPathAnalytics(filteredZones[s:e], &wg)
166172
}
167173
}
168174

@@ -287,6 +293,10 @@ func main() {
287293
viper.BindEnv("enable_pprof")
288294
viper.SetDefault("enable_pprof", false)
289295

296+
flags.Bool("enable_edge_errors_by_path", false, "enable edge errors by path metric (high cardinality)")
297+
viper.BindEnv("enable_edge_errors_by_path")
298+
viper.SetDefault("enable_edge_errors_by_path", false)
299+
290300
viper.BindPFlags(flags)
291301

292302
logLevel := viper.GetString("log_level")

prometheus.go

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,7 @@ const (
6363
tunnelHealthStatusMetricName MetricName = "cloudflare_tunnel_health_status"
6464
tunnelConnectorInfoMetricName MetricName = "cloudflare_tunnel_connector_info"
6565
tunnelConnectorActiveConnectionsMetricName MetricName = "cloudflare_tunnel_connector_active_connections"
66+
zoneEdgeErrorsByPathMetricName MetricName = "cloudflare_zone_edge_errors_by_path"
6667
)
6768

6869
type MetricsSet map[MetricName]struct{}
@@ -334,6 +335,11 @@ var (
334335
Name: tunnelConnectorActiveConnectionsMetricName.String(),
335336
Help: "Reports number of active connections for a Cloudflare Tunnel connector",
336337
}, []string{"account", "tunnel_id", "client_id"})
338+
339+
zoneEdgeErrorsByPath = prometheus.NewCounterVec(prometheus.CounterOpts{
340+
Name: zoneEdgeErrorsByPathMetricName.String(),
341+
Help: "Number of edge errors (4xx and 5xx) by request path",
342+
}, []string{"zone", "account", "status", "host", "path"})
337343
)
338344

339345
func buildAllMetricsSet() MetricsSet {
@@ -377,6 +383,7 @@ func buildAllMetricsSet() MetricsSet {
377383
allMetricsSet.Add(tunnelHealthStatusMetricName)
378384
allMetricsSet.Add(tunnelConnectorInfoMetricName)
379385
allMetricsSet.Add(tunnelConnectorActiveConnectionsMetricName)
386+
allMetricsSet.Add(zoneEdgeErrorsByPathMetricName)
380387
return allMetricsSet
381388
}
382389

@@ -523,6 +530,9 @@ func mustRegisterMetrics(deniedMetrics MetricsSet) {
523530
if !deniedMetrics.Has(tunnelConnectorActiveConnectionsMetricName) {
524531
prometheus.MustRegister(tunnelConnectorActiveConnections)
525532
}
533+
if !deniedMetrics.Has(zoneEdgeErrorsByPathMetricName) {
534+
prometheus.MustRegister(zoneEdgeErrorsByPath)
535+
}
526536
}
527537

528538
func fetchLoadblancerPoolsHealth(account cfaccounts.Account, wg *sync.WaitGroup) {
@@ -903,6 +913,54 @@ func addHTTPAdaptiveGroups(z *zoneResp, name string, account string) {
903913
}
904914
}
905915

916+
func fetchEdgeErrorsByPathAnalytics(zones []cfzones.Zone, wg *sync.WaitGroup) {
917+
defer wg.Done()
918+
919+
if !viper.GetBool("enable_edge_errors_by_path") {
920+
return
921+
}
922+
923+
if viper.GetBool("free_tier") {
924+
return
925+
}
926+
927+
zoneIDs := extractZoneIDs(zones)
928+
if len(zoneIDs) == 0 {
929+
return
930+
}
931+
932+
r, err := fetchEdgeErrorsByPath(zoneIDs)
933+
if err != nil {
934+
log.Error("failed to fetch edge errors by path: ", err)
935+
return
936+
}
937+
938+
for _, z := range r.Viewer.Zones {
939+
name, account := findZoneAccountName(zones, z.ZoneTag)
940+
addEdgeErrorsByPath(&z, name, account)
941+
}
942+
}
943+
944+
func addEdgeErrorsByPath(z *zoneRespEdgeErrorsByPath, name string, account string) {
945+
if len(z.HTTPRequestsAdaptiveGroups) == 0 {
946+
return
947+
}
948+
949+
label := prometheus.Labels{"zone": name, "account": account}
950+
zoneEdgeErrorsByPath.DeletePartialMatch(label)
951+
952+
for _, g := range z.HTTPRequestsAdaptiveGroups {
953+
zoneEdgeErrorsByPath.With(
954+
prometheus.Labels{
955+
"zone": name,
956+
"account": account,
957+
"status": strconv.Itoa(int(g.Dimensions.EdgeResponseStatus)),
958+
"host": g.Dimensions.ClientRequestHTTPHost,
959+
"path": normalizePath(g.Dimensions.ClientRequestPath),
960+
}).Add(float64(g.Count))
961+
}
962+
}
963+
906964
func fetchLoadBalancerAnalytics(zones []cfzones.Zone, wg *sync.WaitGroup) {
907965
defer wg.Done()
908966

utils.go

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@ package main
22

33
import (
44
"encoding/json"
5+
"regexp"
6+
"strings"
57
"time"
68

79
"github.com/spf13/viper"
@@ -21,3 +23,33 @@ func jsonStringToMap(fields string) (map[string]interface{}, error) {
2123
err := json.Unmarshal([]byte(fields), &extraFields)
2224
return extraFields, err
2325
}
26+
27+
var (
28+
numericIDPattern = regexp.MustCompile(`^[0-9]+$`)
29+
uuidPattern = regexp.MustCompile(`^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$`)
30+
hexIDPattern = regexp.MustCompile(`^[0-9a-fA-F]{8,}$`)
31+
)
32+
33+
func normalizePath(path string) string {
34+
if path == "" || path == "/" {
35+
return path
36+
}
37+
38+
path = strings.Split(path, "?")[0]
39+
40+
segments := strings.Split(path, "/")
41+
for i, segment := range segments {
42+
if segment == "" {
43+
continue
44+
}
45+
if numericIDPattern.MatchString(segment) {
46+
segments[i] = ":id"
47+
} else if uuidPattern.MatchString(segment) {
48+
segments[i] = ":uuid"
49+
} else if hexIDPattern.MatchString(segment) {
50+
segments[i] = ":id"
51+
}
52+
}
53+
54+
return strings.Join(segments, "/")
55+
}

utils_test.go

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
package main
2+
3+
import "testing"
4+
5+
func TestNormalizePath(t *testing.T) {
6+
tests := []struct {
7+
input string
8+
expected string
9+
}{
10+
{"", ""},
11+
{"/", "/"},
12+
{"/health", "/health"},
13+
{"/api/v1/users", "/api/v1/users"},
14+
{"/users/123", "/users/:id"},
15+
{"/users/123/orders/456", "/users/:id/orders/:id"},
16+
{"/orders/550e8400-e29b-41d4-a716-446655440000", "/orders/:uuid"},
17+
{"/items/5f3a2b1c9d", "/items/:id"},
18+
{"/search?q=test&page=1", "/search"},
19+
{"/api/v1/users/123?include=orders", "/api/v1/users/:id"},
20+
}
21+
22+
for _, tt := range tests {
23+
t.Run(tt.input, func(t *testing.T) {
24+
result := normalizePath(tt.input)
25+
if result != tt.expected {
26+
t.Errorf("normalizePath(%q) = %q, want %q", tt.input, result, tt.expected)
27+
}
28+
})
29+
}
30+
}

0 commit comments

Comments
 (0)