Skip to content

Commit 7eb1a87

Browse files
committed
SD-11577: add support for bucketing metrics based on args
1 parent 59f6e49 commit 7eb1a87

File tree

4 files changed

+28
-84
lines changed

4 files changed

+28
-84
lines changed

README.md

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,6 @@ Required authentication scopes:
3535
- `Zone/Firewall Services:Read` is required to fetch zone rule name for `cloudflare_zone_firewall_events_count` metric
3636
- `Account/Account Rulesets:Read` is required to fetch account rule name for `cloudflare_zone_firewall_events_count` metric
3737
- `Account:Load Balancing: Monitors and Pools:Read` is required to fetch pools origin health status `cloudflare_pool_origin_health_status` metric
38-
- `Account/Workers KV Storage:Read` is required for KV metrics
3938
- `Cloudflare Tunnel Read` is required to fetch Cloudflare Tunnel (Cloudflare Zero Trust) metrics
4039

4140
To authenticate this way, only set `CF_API_TOKEN` (omit `CF_API_EMAIL` and `CF_API_KEY`)
@@ -67,7 +66,7 @@ The exporter can be configured using env variables or command flags.
6766
| `SCRAPE_DELAY` | scrape delay in seconds, default `300` |
6867
| `SCRAPE_INTERVAL` | scrape interval in seconds (will query cloudflare every SCRAPE_INTERVAL seconds), default `60` |
6968
| `METRICS_DENYLIST` | (Optional) cloudflare-exporter metrics to not export, comma delimited list of cloudflare-exporter metrics. If not set, all metrics are exported |
70-
| `KV_CACHE_INTERVAL` | (Optional) KV namespace cache refresh interval, default `5m` |
69+
| `KV_NAMESPACE_IDS` | (Optional) KV namespace IDs to track individually, comma delimited. Unlisted namespaces are aggregated as `other` |
7170
| `ENABLE_PPROF` | (Optional) enable pprof profiling endpoints at `/debug/pprof/`. Accepts `true` or `false`, default `false`. **Warning**: Only enable in development/debugging environments |
7271
| `ZONE_<NAME>` | `DEPRECATED since 0.0.5` (optional) Zone ID. Add zones you want to scrape by adding env vars in this format. You can find the zone ids in Cloudflare dashboards. |
7372
| `LOG_LEVEL` | Set loglevel. Options are error, warn, info, debug. default `error` |
@@ -86,7 +85,7 @@ Corresponding flags:
8685
-metrics_path="/metrics": path for metrics, default /metrics
8786
-scrape_delay=300: scrape delay in seconds, defaults to 300
8887
-scrape_interval=60: scrape interval in seconds, defaults to 60
89-
-kv_cache_interval="5m": KV namespace cache refresh interval, default 5 minutes
88+
-kv_namespace_ids="": KV namespace IDs to track individually, comma delimited
9089
-metrics_denylist="": cloudflare-exporter metrics to not export, comma delimited list
9190
-enable_pprof=false: enable pprof profiling endpoints at /debug/pprof/
9291
-log_level="error": log level(error,warn,info,debug)

cloudflare.go

Lines changed: 0 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@ import (
77

88
cf "github.com/cloudflare/cloudflare-go/v4"
99
cfaccounts "github.com/cloudflare/cloudflare-go/v4/accounts"
10-
cfkv "github.com/cloudflare/cloudflare-go/v4/kv"
1110
cfload_balancers "github.com/cloudflare/cloudflare-go/v4/load_balancers"
1211
cfpagination "github.com/cloudflare/cloudflare-go/v4/packages/pagination"
1312
cfrulesets "github.com/cloudflare/cloudflare-go/v4/rulesets"
@@ -1110,35 +1109,6 @@ func filterNonFreePlanZones(zones []cfzones.Zone) (filteredZones []cfzones.Zone)
11101109
return
11111110
}
11121111

1113-
func fetchKVNamespaces(accountID string) (map[string]string, error) {
1114-
namespaceMap := make(map[string]string)
1115-
ctx, cancel := context.WithTimeout(context.Background(), cftimeout)
1116-
defer cancel()
1117-
page := cfclient.KV.Namespaces.ListAutoPaging(ctx, cfkv.NamespaceListParams{
1118-
AccountID: cf.F(accountID),
1119-
})
1120-
if page.Err() != nil {
1121-
return nil, page.Err()
1122-
}
1123-
1124-
seenIDs := make(map[string]struct{})
1125-
for page.Next() {
1126-
if page.Err() != nil {
1127-
log.Errorf("error during paging KV namespaces: %v", page.Err())
1128-
break
1129-
}
1130-
ns := page.Current()
1131-
if _, exists := seenIDs[ns.ID]; exists {
1132-
log.Errorf("fetchKVNamespaces: duplicate namespace ID detected (%s), breaking loop", ns.ID)
1133-
break
1134-
}
1135-
seenIDs[ns.ID] = struct{}{}
1136-
namespaceMap[ns.ID] = ns.Title
1137-
}
1138-
1139-
return namespaceMap, nil
1140-
}
1141-
11421112
func fetchKVOperations(accountID string) (*cloudflareResponseKV, error) {
11431113
request := graphql.NewRequest(`
11441114
query ($accountID: String!, $mintime: Time!, $maxtime: Time!, $limit: Int!) {

main.go

Lines changed: 16 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@ import (
77
"runtime"
88
"strings"
99
"sync"
10-
"sync/atomic"
1110
"time"
1211

1312
"github.com/nelkinda/health-go"
@@ -27,9 +26,9 @@ var (
2726
gql *GraphQL
2827
log = logrus.New()
2928

30-
// kvNamespaceCache stores accountID -> namespaceID -> name.
31-
// Atomically replaced by refreshKVNamespaceCache, read by getKVNamespaceMap.
32-
kvNamespaceCache atomic.Pointer[map[string]map[string]string]
29+
// kvTrackedNamespaces is the set of KV namespace IDs that get their own
30+
// namespace_id label. All other namespaces are aggregated under "other".
31+
kvTrackedNamespaces map[string]struct{}
3332
)
3433

3534
// var (
@@ -156,30 +155,6 @@ func fetchMetrics(deniedMetricsSet MetricsSet) {
156155
wg.Wait()
157156
}
158157

159-
func refreshKVNamespaceCache() {
160-
accounts := fetchAccounts()
161-
newCache := make(map[string]map[string]string, len(accounts))
162-
for _, a := range accounts {
163-
nsMap, err := fetchKVNamespaces(a.ID)
164-
if err != nil {
165-
log.Warnf("failed to refresh KV namespace cache for account %s: %v", a.ID, err)
166-
continue
167-
}
168-
newCache[a.ID] = nsMap
169-
}
170-
171-
kvNamespaceCache.Store(&newCache)
172-
log.Info("KV namespace cache refreshed")
173-
}
174-
175-
func getKVNamespaceMap(accountID string) map[string]string {
176-
cache := kvNamespaceCache.Load()
177-
if cache == nil {
178-
return nil
179-
}
180-
return (*cache)[accountID]
181-
}
182-
183158
func runExporter() {
184159
cfgMetricsPath := viper.GetString("metrics_path")
185160

@@ -203,15 +178,17 @@ func runExporter() {
203178
log.Debugf("Metrics set: %v", metricsSet)
204179
mustRegisterMetrics(metricsSet)
205180

206-
// Populate KV namespace cache at boot, then refresh on interval.
207-
refreshKVNamespaceCache()
208-
kvCacheInterval := viper.GetDuration("kv_cache_interval")
209-
log.Info("KV namespace cache refresh interval set to ", kvCacheInterval)
210-
go func() {
211-
for range time.NewTicker(kvCacheInterval).C {
212-
refreshKVNamespaceCache()
181+
// Build tracked KV namespace set from config.
182+
kvTrackedNamespaces = make(map[string]struct{})
183+
if ids := viper.GetString("kv_namespace_ids"); ids != "" {
184+
for _, id := range strings.Split(ids, ",") {
185+
id = strings.TrimSpace(id)
186+
if id != "" {
187+
kvTrackedNamespaces[id] = struct{}{}
188+
}
213189
}
214-
}()
190+
}
191+
log.Infof("Tracking %d KV namespace IDs", len(kvTrackedNamespaces))
215192

216193
scrapeInterval := time.Duration(viper.GetInt("scrape_interval")) * time.Second
217194
log.Info("Scrape interval set to ", scrapeInterval)
@@ -296,9 +273,9 @@ func main() {
296273
viper.BindEnv("cf_timeout")
297274
viper.SetDefault("cf_timeout", 10*time.Second)
298275

299-
flags.Duration("kv_cache_interval", 5*time.Minute, "KV namespace cache refresh interval, default 5 minutes")
300-
viper.BindEnv("kv_cache_interval")
301-
viper.SetDefault("kv_cache_interval", 5*time.Minute)
276+
flags.String("kv_namespace_ids", "", "KV namespace IDs to track individually, comma delimited. Unlisted namespaces are aggregated as 'other'")
277+
viper.BindEnv("kv_namespace_ids")
278+
viper.SetDefault("kv_namespace_ids", "")
302279

303280
flags.String("metrics_denylist", "", "metrics to not expose, comma delimited list")
304281
viper.BindEnv("metrics_denylist")

prometheus.go

Lines changed: 10 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -341,12 +341,12 @@ var (
341341
kvRequests = prometheus.NewGaugeVec(prometheus.GaugeOpts{
342342
Name: kvRequestsMetricName.String(),
343343
Help: "Number of KV operations by namespace and action type",
344-
}, []string{"namespace_name", "action_type", "account"})
344+
}, []string{"namespace_id", "action_type", "account"})
345345

346346
kvLatency = prometheus.NewGaugeVec(prometheus.GaugeOpts{
347347
Name: kvLatencyMetricName.String(),
348348
Help: "KV operation latency quantiles (milliseconds)",
349-
}, []string{"namespace_name", "action_type", "account", "quantile"})
349+
}, []string{"namespace_id", "action_type", "account", "quantile"})
350350

351351
dnsFirewallQueryCount = prometheus.NewGaugeVec(prometheus.GaugeOpts{
352352
Name: dnsFirewallQueryCountMetricName.String(),
@@ -631,8 +631,6 @@ func fetchKVAnalytics(account cfaccounts.Account, wg *sync.WaitGroup, deniedMetr
631631
wg.Add(1)
632632
defer wg.Done()
633633

634-
namespaceMap := getKVNamespaceMap(account.ID)
635-
636634
r, err := fetchKVOperations(account.ID)
637635
if err != nil {
638636
log.Error("failed to fetch KV operations for account ", account.ID, ": ", err)
@@ -643,19 +641,19 @@ func fetchKVAnalytics(account cfaccounts.Account, wg *sync.WaitGroup, deniedMetr
643641

644642
for _, a := range r.Viewer.Accounts {
645643
for _, kv := range a.KvOperationsAdaptiveGroups {
646-
namespaceName := namespaceMap[kv.Dimensions.NamespaceID]
647-
if namespaceName == "" {
648-
namespaceName = kv.Dimensions.NamespaceID
644+
nsID := kv.Dimensions.NamespaceID
645+
if _, tracked := kvTrackedNamespaces[nsID]; !tracked {
646+
nsID = "other"
649647
}
650648

651649
if !deniedMetricsSet.Has(kvRequestsMetricName) {
652-
kvRequests.With(prometheus.Labels{"namespace_name": namespaceName, "action_type": kv.Dimensions.ActionType, "account": accountName}).Set(float64(kv.Sum.Requests))
650+
kvRequests.With(prometheus.Labels{"namespace_id": nsID, "action_type": kv.Dimensions.ActionType, "account": accountName}).Add(float64(kv.Sum.Requests))
653651
}
654652
if !deniedMetricsSet.Has(kvLatencyMetricName) {
655-
kvLatency.With(prometheus.Labels{"namespace_name": namespaceName, "action_type": kv.Dimensions.ActionType, "account": accountName, "quantile": "P50"}).Set(float64(kv.Quantiles.LatencyMsP50))
656-
kvLatency.With(prometheus.Labels{"namespace_name": namespaceName, "action_type": kv.Dimensions.ActionType, "account": accountName, "quantile": "P75"}).Set(float64(kv.Quantiles.LatencyMsP75))
657-
kvLatency.With(prometheus.Labels{"namespace_name": namespaceName, "action_type": kv.Dimensions.ActionType, "account": accountName, "quantile": "P99"}).Set(float64(kv.Quantiles.LatencyMsP99))
658-
kvLatency.With(prometheus.Labels{"namespace_name": namespaceName, "action_type": kv.Dimensions.ActionType, "account": accountName, "quantile": "P999"}).Set(float64(kv.Quantiles.LatencyMsP999))
653+
kvLatency.With(prometheus.Labels{"namespace_id": nsID, "action_type": kv.Dimensions.ActionType, "account": accountName, "quantile": "P50"}).Set(float64(kv.Quantiles.LatencyMsP50))
654+
kvLatency.With(prometheus.Labels{"namespace_id": nsID, "action_type": kv.Dimensions.ActionType, "account": accountName, "quantile": "P75"}).Set(float64(kv.Quantiles.LatencyMsP75))
655+
kvLatency.With(prometheus.Labels{"namespace_id": nsID, "action_type": kv.Dimensions.ActionType, "account": accountName, "quantile": "P99"}).Set(float64(kv.Quantiles.LatencyMsP99))
656+
kvLatency.With(prometheus.Labels{"namespace_id": nsID, "action_type": kv.Dimensions.ActionType, "account": accountName, "quantile": "P999"}).Set(float64(kv.Quantiles.LatencyMsP999))
659657
}
660658
}
661659
}

0 commit comments

Comments
 (0)