Skip to content

Commit a9d9406

Browse files
authored
SD-11577: add support for kv metrics (#10)
* SD-11577: add support for kv metrics * SD-11577: code review from agent * SD-11577: add support for bucketing metrics based on args
1 parent 1e8dfad commit a9d9406

File tree

4 files changed

+146
-0
lines changed

4 files changed

+146
-0
lines changed

README.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,7 @@ The exporter can be configured using env variables or command flags.
6666
| `SCRAPE_DELAY` | scrape delay in seconds, default `300` |
6767
| `SCRAPE_INTERVAL` | scrape interval in seconds (will query cloudflare every SCRAPE_INTERVAL seconds), default `60` |
6868
| `METRICS_DENYLIST` | (Optional) cloudflare-exporter metrics to not export, comma delimited list of cloudflare-exporter metrics. If not set, all metrics are exported |
69+
| `KV_NAMESPACE_IDS` | (Optional) KV namespace IDs to track individually, comma delimited. Unlisted namespaces are aggregated as `other` |
6970
| `ENABLE_PPROF` | (Optional) enable pprof profiling endpoints at `/debug/pprof/`. Accepts `true` or `false`, default `false`. **Warning**: Only enable in development/debugging environments |
7071
| `ZONE_<NAME>` | `DEPRECATED since 0.0.5` (optional) Zone ID. Add zones you want to scrape by adding env vars in this format. You can find the zone ids in Cloudflare dashboards. |
7172
| `LOG_LEVEL` | Set loglevel. Options are error, warn, info, debug. default `error` |
@@ -84,6 +85,7 @@ Corresponding flags:
8485
-metrics_path="/metrics": path for metrics, default /metrics
8586
-scrape_delay=300: scrape delay in seconds, defaults to 300
8687
-scrape_interval=60: scrape interval in seconds, defaults to 60
88+
-kv_namespace_ids="": KV namespace IDs to track individually, comma delimited
8789
-metrics_denylist="": cloudflare-exporter metrics to not export, comma delimited list
8890
-enable_pprof=false: enable pprof profiling endpoints at /debug/pprof/
8991
-log_level="error": log level(error,warn,info,debug)
@@ -127,6 +129,8 @@ Note: `ZONE_<name>` configuration is not supported as flag.
127129
# HELP cloudflare_r2_operation_count Number of operations performed by R2
128130
# HELP cloudflare_r2_storage_bytes Storage used by R2
129131
# HELP cloudflare_r2_storage_total_bytes Total storage used by R2
132+
# HELP cloudflare_kv_requests_count Number of KV operations by namespace and action type
133+
# HELP cloudflare_kv_latency KV operation latency quantiles (milliseconds)
130134
```
131135

132136
## Helm chart repository

cloudflare.go

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -299,6 +299,30 @@ type lbResp struct {
299299
ZoneTag string `json:"zoneTag"`
300300
}
301301

302+
type cloudflareResponseKV struct {
303+
Viewer struct {
304+
Accounts []kvAccountResp `json:"accounts"`
305+
} `json:"viewer"`
306+
}
307+
308+
type kvAccountResp struct {
309+
KvOperationsAdaptiveGroups []struct {
310+
Dimensions struct {
311+
NamespaceID string `json:"namespaceId"`
312+
ActionType string `json:"actionType"`
313+
} `json:"dimensions"`
314+
Sum struct {
315+
Requests uint64 `json:"requests"`
316+
} `json:"sum"`
317+
Quantiles struct {
318+
LatencyMsP50 float32 `json:"latencyMsP50"`
319+
LatencyMsP75 float32 `json:"latencyMsP75"`
320+
LatencyMsP99 float32 `json:"latencyMsP99"`
321+
LatencyMsP999 float32 `json:"latencyMsP999"`
322+
} `json:"quantiles"`
323+
} `json:"kvOperationsAdaptiveGroups"`
324+
}
325+
302326
type cloudflareResponseDNSFirewall struct {
303327
Viewer struct {
304328
Accounts []dnsFirewallAccountResp `json:"accounts"`
@@ -1085,6 +1109,51 @@ func filterNonFreePlanZones(zones []cfzones.Zone) (filteredZones []cfzones.Zone)
10851109
return
10861110
}
10871111

1112+
func fetchKVOperations(accountID string) (*cloudflareResponseKV, error) {
1113+
request := graphql.NewRequest(`
1114+
query ($accountID: String!, $mintime: Time!, $maxtime: Time!, $limit: Int!) {
1115+
viewer {
1116+
accounts(filter: {accountTag: $accountID}) {
1117+
kvOperationsAdaptiveGroups(limit: $limit, filter: {datetime_geq: $mintime, datetime_lt: $maxtime}) {
1118+
dimensions {
1119+
namespaceId
1120+
actionType
1121+
}
1122+
sum {
1123+
requests
1124+
}
1125+
quantiles {
1126+
latencyMsP50
1127+
latencyMsP75
1128+
latencyMsP99
1129+
latencyMsP999
1130+
}
1131+
}
1132+
}
1133+
}
1134+
}`)
1135+
1136+
now, now1mAgo := GetTimeRange()
1137+
request.Var("limit", gqlQueryLimit)
1138+
request.Var("maxtime", now)
1139+
request.Var("mintime", now1mAgo)
1140+
request.Var("accountID", accountID)
1141+
1142+
gql.Mu.RLock()
1143+
defer gql.Mu.RUnlock()
1144+
1145+
ctx, cancel := context.WithTimeout(context.Background(), cftimeout)
1146+
defer cancel()
1147+
1148+
var resp cloudflareResponseKV
1149+
if err := gql.Client.Run(ctx, request, &resp); err != nil {
1150+
log.Errorf("error fetching KV operations, err:%v", err)
1151+
return nil, err
1152+
}
1153+
1154+
return &resp, nil
1155+
}
1156+
10881157
func fetchDNSFirewallTotals(accountID string) (*cloudflareResponseDNSFirewall, error) {
10891158
request := graphql.NewRequest(`
10901159
query ($accountID: string, $mintime: Time!, $maxtime: Time!, $limit: Int!) {

main.go

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,10 @@ var (
2525
cftimeout time.Duration
2626
gql *GraphQL
2727
log = logrus.New()
28+
29+
// kvTrackedNamespaces is the set of KV namespace IDs that get their own
30+
// namespace_id label. All other namespaces are aggregated under "other".
31+
kvTrackedNamespaces map[string]struct{}
2832
)
2933

3034
// var (
@@ -110,6 +114,7 @@ func fetchMetrics(deniedMetricsSet MetricsSet) {
110114

111115
for _, a := range accounts {
112116
go fetchWorkerAnalytics(a, &wg)
117+
go fetchKVAnalytics(a, &wg, deniedMetricsSet)
113118
go fetchLogpushAnalyticsForAccount(a, &wg)
114119
go fetchR2StorageForAccount(a, &wg)
115120
go fetchLoadblancerPoolsHealth(a, &wg)
@@ -173,6 +178,18 @@ func runExporter() {
173178
log.Debugf("Metrics set: %v", metricsSet)
174179
mustRegisterMetrics(metricsSet)
175180

181+
// Build tracked KV namespace set from config.
182+
kvTrackedNamespaces = make(map[string]struct{})
183+
if ids := viper.GetString("kv_namespace_ids"); ids != "" {
184+
for _, id := range strings.Split(ids, ",") {
185+
id = strings.TrimSpace(id)
186+
if id != "" {
187+
kvTrackedNamespaces[id] = struct{}{}
188+
}
189+
}
190+
}
191+
log.Infof("Tracking %d KV namespace IDs", len(kvTrackedNamespaces))
192+
176193
scrapeInterval := time.Duration(viper.GetInt("scrape_interval")) * time.Second
177194
log.Info("Scrape interval set to ", scrapeInterval)
178195

@@ -256,6 +273,10 @@ func main() {
256273
viper.BindEnv("cf_timeout")
257274
viper.SetDefault("cf_timeout", 10*time.Second)
258275

276+
flags.String("kv_namespace_ids", "", "KV namespace IDs to track individually, comma delimited. Unlisted namespaces are aggregated as 'other'")
277+
viper.BindEnv("kv_namespace_ids")
278+
viper.SetDefault("kv_namespace_ids", "")
279+
259280
flags.String("metrics_denylist", "", "metrics to not expose, comma delimited list")
260281
viper.BindEnv("metrics_denylist")
261282
viper.SetDefault("metrics_denylist", "")

prometheus.go

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,8 @@ const (
6464
tunnelConnectorInfoMetricName MetricName = "cloudflare_tunnel_connector_info"
6565
tunnelConnectorActiveConnectionsMetricName MetricName = "cloudflare_tunnel_connector_active_connections"
6666
dnsFirewallQueryCountMetricName MetricName = "cloudflare_dns_firewall_query_count"
67+
kvRequestsMetricName MetricName = "cloudflare_kv_requests_count"
68+
kvLatencyMetricName MetricName = "cloudflare_kv_latency"
6769
)
6870

6971
type MetricsSet map[MetricName]struct{}
@@ -336,6 +338,16 @@ var (
336338
Help: "Reports number of active connections for a Cloudflare Tunnel connector",
337339
}, []string{"account", "tunnel_id", "client_id"})
338340

341+
kvRequests = prometheus.NewGaugeVec(prometheus.GaugeOpts{
342+
Name: kvRequestsMetricName.String(),
343+
Help: "Number of KV operations by namespace and action type",
344+
}, []string{"namespace_id", "action_type", "account"})
345+
346+
kvLatency = prometheus.NewGaugeVec(prometheus.GaugeOpts{
347+
Name: kvLatencyMetricName.String(),
348+
Help: "KV operation latency quantiles (milliseconds)",
349+
}, []string{"namespace_id", "action_type", "account", "quantile"})
350+
339351
dnsFirewallQueryCount = prometheus.NewGaugeVec(prometheus.GaugeOpts{
340352
Name: dnsFirewallQueryCountMetricName.String(),
341353
Help: "DNS Firewall query count by query type and response code",
@@ -388,6 +400,8 @@ func buildAllMetricsSet() MetricsSet {
388400
allMetricsSet.Add(tunnelConnectorInfoMetricName)
389401
allMetricsSet.Add(tunnelConnectorActiveConnectionsMetricName)
390402
allMetricsSet.Add(dnsFirewallQueryCountMetricName)
403+
allMetricsSet.Add(kvRequestsMetricName)
404+
allMetricsSet.Add(kvLatencyMetricName)
391405
return allMetricsSet
392406
}
393407

@@ -537,6 +551,12 @@ func mustRegisterMetrics(deniedMetrics MetricsSet) {
537551
if !deniedMetrics.Has(dnsFirewallQueryCountMetricName) {
538552
prometheus.MustRegister(dnsFirewallQueryCount)
539553
}
554+
if !deniedMetrics.Has(kvRequestsMetricName) {
555+
prometheus.MustRegister(kvRequests)
556+
}
557+
if !deniedMetrics.Has(kvLatencyMetricName) {
558+
prometheus.MustRegister(kvLatency)
559+
}
540560
}
541561

542562
func fetchLoadblancerPoolsHealth(account cfaccounts.Account, wg *sync.WaitGroup) {
@@ -607,6 +627,38 @@ func fetchWorkerAnalytics(account cfaccounts.Account, wg *sync.WaitGroup) {
607627
}
608628
}
609629

630+
func fetchKVAnalytics(account cfaccounts.Account, wg *sync.WaitGroup, deniedMetricsSet MetricsSet) {
631+
wg.Add(1)
632+
defer wg.Done()
633+
634+
r, err := fetchKVOperations(account.ID)
635+
if err != nil {
636+
log.Error("failed to fetch KV operations for account ", account.ID, ": ", err)
637+
return
638+
}
639+
640+
accountName := strings.ToLower(strings.ReplaceAll(account.Name, " ", "-"))
641+
642+
for _, a := range r.Viewer.Accounts {
643+
for _, kv := range a.KvOperationsAdaptiveGroups {
644+
nsID := kv.Dimensions.NamespaceID
645+
if _, tracked := kvTrackedNamespaces[nsID]; !tracked {
646+
nsID = "other"
647+
}
648+
649+
if !deniedMetricsSet.Has(kvRequestsMetricName) {
650+
kvRequests.With(prometheus.Labels{"namespace_id": nsID, "action_type": kv.Dimensions.ActionType, "account": accountName}).Add(float64(kv.Sum.Requests))
651+
}
652+
if !deniedMetricsSet.Has(kvLatencyMetricName) {
653+
kvLatency.With(prometheus.Labels{"namespace_id": nsID, "action_type": kv.Dimensions.ActionType, "account": accountName, "quantile": "P50"}).Set(float64(kv.Quantiles.LatencyMsP50))
654+
kvLatency.With(prometheus.Labels{"namespace_id": nsID, "action_type": kv.Dimensions.ActionType, "account": accountName, "quantile": "P75"}).Set(float64(kv.Quantiles.LatencyMsP75))
655+
kvLatency.With(prometheus.Labels{"namespace_id": nsID, "action_type": kv.Dimensions.ActionType, "account": accountName, "quantile": "P99"}).Set(float64(kv.Quantiles.LatencyMsP99))
656+
kvLatency.With(prometheus.Labels{"namespace_id": nsID, "action_type": kv.Dimensions.ActionType, "account": accountName, "quantile": "P999"}).Set(float64(kv.Quantiles.LatencyMsP999))
657+
}
658+
}
659+
}
660+
}
661+
610662
func fetchLogpushAnalyticsForAccount(account cfaccounts.Account, wg *sync.WaitGroup) {
611663
wg.Add(1)
612664
defer wg.Done()

0 commit comments

Comments
 (0)