Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ Required authentication scopes:
- `Zone/Firewall Services:Read` is required to fetch zone rule name for `cloudflare_zone_firewall_events_count` metric
- `Account/Account Rulesets:Read` is required to fetch account rule name for `cloudflare_zone_firewall_events_count` metric
- `Account:Load Balancing: Monitors and Pools:Read` is required to fetch pools origin health status `cloudflare_pool_origin_health_status` metric
- `Account/Workers KV Storage:Read` is required for KV metrics
- `Cloudflare Tunnel Read` is required to fetch Cloudflare Tunnel (Cloudflare Zero Trust) metrics

To authenticate this way, only set `CF_API_TOKEN` (omit `CF_API_EMAIL` and `CF_API_KEY`)
Expand Down Expand Up @@ -127,6 +128,8 @@ Note: `ZONE_<name>` configuration is not supported as flag.
# HELP cloudflare_r2_operation_count Number of operations performed by R2
# HELP cloudflare_r2_storage_bytes Storage used by R2
# HELP cloudflare_r2_storage_total_bytes Total storage used by R2
# HELP cloudflare_kv_requests_count Number of KV operations by namespace and action type
# HELP cloudflare_kv_latency KV operation latency quantiles (milliseconds)
```

## Helm chart repository
Expand Down
99 changes: 99 additions & 0 deletions cloudflare.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (

cf "github.com/cloudflare/cloudflare-go/v4"
cfaccounts "github.com/cloudflare/cloudflare-go/v4/accounts"
cfkv "github.com/cloudflare/cloudflare-go/v4/kv"
cfload_balancers "github.com/cloudflare/cloudflare-go/v4/load_balancers"
cfpagination "github.com/cloudflare/cloudflare-go/v4/packages/pagination"
cfrulesets "github.com/cloudflare/cloudflare-go/v4/rulesets"
Expand Down Expand Up @@ -299,6 +300,30 @@ type lbResp struct {
ZoneTag string `json:"zoneTag"`
}

type cloudflareResponseKV struct {
Viewer struct {
Accounts []kvAccountResp `json:"accounts"`
} `json:"viewer"`
}

type kvAccountResp struct {
KvOperationsAdaptiveGroups []struct {
Dimensions struct {
NamespaceID string `json:"namespaceId"`
ActionType string `json:"actionType"`
} `json:"dimensions"`
Sum struct {
Requests uint64 `json:"requests"`
} `json:"sum"`
Quantiles struct {
LatencyMsP50 float32 `json:"latencyMsP50"`
LatencyMsP75 float32 `json:"latencyMsP75"`
LatencyMsP99 float32 `json:"latencyMsP99"`
LatencyMsP999 float32 `json:"latencyMsP999"`
} `json:"quantiles"`
} `json:"kvOperationsAdaptiveGroups"`
}

type cloudflareResponseDNSFirewall struct {
Viewer struct {
Accounts []dnsFirewallAccountResp `json:"accounts"`
Expand Down Expand Up @@ -1085,6 +1110,80 @@ func filterNonFreePlanZones(zones []cfzones.Zone) (filteredZones []cfzones.Zone)
return
}

func fetchKVNamespaces(accountID string) (map[string]string, error) {
namespaceMap := make(map[string]string)
ctx, cancel := context.WithTimeout(context.Background(), cftimeout)
defer cancel()
page := cfclient.KV.Namespaces.ListAutoPaging(ctx, cfkv.NamespaceListParams{
AccountID: cf.F(accountID),
})
if page.Err() != nil {
return nil, page.Err()
}

seenIDs := make(map[string]struct{})
for page.Next() {
if page.Err() != nil {
log.Errorf("error during paging KV namespaces: %v", page.Err())
break
}
ns := page.Current()
if _, exists := seenIDs[ns.ID]; exists {
log.Errorf("fetchKVNamespaces: duplicate namespace ID detected (%s), breaking loop", ns.ID)
break
}
seenIDs[ns.ID] = struct{}{}
namespaceMap[ns.ID] = ns.Title
}

return namespaceMap, nil
}

func fetchKVOperations(accountID string) (*cloudflareResponseKV, error) {
request := graphql.NewRequest(`
query ($accountID: String!, $mintime: Time!, $maxtime: Time!, $limit: Int!) {
viewer {
accounts(filter: {accountTag: $accountID}) {
kvOperationsAdaptiveGroups(limit: $limit, filter: {datetime_geq: $mintime, datetime_lt: $maxtime}) {
dimensions {
namespaceId
actionType
}
sum {
requests
}
quantiles {
latencyMsP50
latencyMsP75
latencyMsP99
latencyMsP999
}
}
}
}
}`)

now, now1mAgo := GetTimeRange()
request.Var("limit", gqlQueryLimit)
request.Var("maxtime", now)
request.Var("mintime", now1mAgo)
request.Var("accountID", accountID)

gql.Mu.RLock()
defer gql.Mu.RUnlock()

ctx, cancel := context.WithTimeout(context.Background(), cftimeout)
defer cancel()

var resp cloudflareResponseKV
if err := gql.Client.Run(ctx, request, &resp); err != nil {
log.Errorf("error fetching KV operations, err:%v", err)
return nil, err
}

return &resp, nil
}

func fetchDNSFirewallTotals(accountID string) (*cloudflareResponseDNSFirewall, error) {
request := graphql.NewRequest(`
query ($accountID: string, $mintime: Time!, $maxtime: Time!, $limit: Int!) {
Expand Down
1 change: 1 addition & 0 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,7 @@ func fetchMetrics(deniedMetricsSet MetricsSet) {

for _, a := range accounts {
go fetchWorkerAnalytics(a, &wg)
go fetchKVAnalytics(a, &wg, deniedMetricsSet)
go fetchLogpushAnalyticsForAccount(a, &wg)
go fetchR2StorageForAccount(a, &wg)
go fetchLoadblancerPoolsHealth(a, &wg)
Expand Down
58 changes: 58 additions & 0 deletions prometheus.go
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,8 @@ const (
tunnelConnectorInfoMetricName MetricName = "cloudflare_tunnel_connector_info"
tunnelConnectorActiveConnectionsMetricName MetricName = "cloudflare_tunnel_connector_active_connections"
dnsFirewallQueryCountMetricName MetricName = "cloudflare_dns_firewall_query_count"
kvRequestsMetricName MetricName = "cloudflare_kv_requests_count"
kvLatencyMetricName MetricName = "cloudflare_kv_latency"
)

type MetricsSet map[MetricName]struct{}
Expand Down Expand Up @@ -336,6 +338,16 @@ var (
Help: "Reports number of active connections for a Cloudflare Tunnel connector",
}, []string{"account", "tunnel_id", "client_id"})

kvRequests = prometheus.NewGaugeVec(prometheus.GaugeOpts{
Name: kvRequestsMetricName.String(),
Help: "Number of KV operations by namespace and action type",
}, []string{"namespace_name", "action_type", "account"})

kvLatency = prometheus.NewGaugeVec(prometheus.GaugeOpts{
Name: kvLatencyMetricName.String(),
Help: "KV operation latency quantiles (milliseconds)",
}, []string{"namespace_name", "action_type", "account", "quantile"})

dnsFirewallQueryCount = prometheus.NewGaugeVec(prometheus.GaugeOpts{
Name: dnsFirewallQueryCountMetricName.String(),
Help: "DNS Firewall query count by query type and response code",
Expand Down Expand Up @@ -388,6 +400,8 @@ func buildAllMetricsSet() MetricsSet {
allMetricsSet.Add(tunnelConnectorInfoMetricName)
allMetricsSet.Add(tunnelConnectorActiveConnectionsMetricName)
allMetricsSet.Add(dnsFirewallQueryCountMetricName)
allMetricsSet.Add(kvRequestsMetricName)
allMetricsSet.Add(kvLatencyMetricName)
return allMetricsSet
}

Expand Down Expand Up @@ -537,6 +551,12 @@ func mustRegisterMetrics(deniedMetrics MetricsSet) {
if !deniedMetrics.Has(dnsFirewallQueryCountMetricName) {
prometheus.MustRegister(dnsFirewallQueryCount)
}
if !deniedMetrics.Has(kvRequestsMetricName) {
prometheus.MustRegister(kvRequests)
}
if !deniedMetrics.Has(kvLatencyMetricName) {
prometheus.MustRegister(kvLatency)
}
}

func fetchLoadblancerPoolsHealth(account cfaccounts.Account, wg *sync.WaitGroup) {
Expand Down Expand Up @@ -607,6 +627,44 @@ func fetchWorkerAnalytics(account cfaccounts.Account, wg *sync.WaitGroup) {
}
}

func fetchKVAnalytics(account cfaccounts.Account, wg *sync.WaitGroup, deniedMetricsSet MetricsSet) {
wg.Add(1)
defer wg.Done()

namespaceMap, err := fetchKVNamespaces(account.ID)

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🍹 From experience, this list is going to get longer and longer the more native hosting stores we have. I think it might be fine for now, but for example, we do the auto paging for custom hostnames and it takes ~5 minutes to get through 90k custom hostnames.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

hmm you're right. do you think it's needed to fetch KV namespaces on every scrape? i think it might be a bit overkill. maybe we can cache them on boot and then refresh the cache on a separate interval (different from the scrape interval)

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  1. Only add namespace_name if the id matches a hardcoded list - need mapping table.
  2. Only add namespace_id if the id matches a hardcoded list - simple slice.
  3. Add both namespace_name and namespace_id if id matches a hardcoded list – need mapping table.

All other namespaces that are not on the hardcoded list don't have the label because it's high-cardinality.

Copy link
Member Author

@rtalvarez rtalvarez Mar 18, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

added changes to implement 2) in e3fb3b0

a quick caveat: prometheus constraint is that the same set of labels have to be set on every series, we can't omit the namespace_id entirely. so i've defaulted it to other so they all get bucketed there.

if err != nil {
log.Error("failed to fetch KV namespaces for account ", account.ID, ": ", err)
return
}

r, err := fetchKVOperations(account.ID)
if err != nil {
log.Error("failed to fetch KV operations for account ", account.ID, ": ", err)
return
}

accountName := strings.ToLower(strings.ReplaceAll(account.Name, " ", "-"))

for _, a := range r.Viewer.Accounts {
for _, kv := range a.KvOperationsAdaptiveGroups {
namespaceName := namespaceMap[kv.Dimensions.NamespaceID]
if namespaceName == "" {
namespaceName = kv.Dimensions.NamespaceID
}

if !deniedMetricsSet.Has(kvRequestsMetricName) {
kvRequests.With(prometheus.Labels{"namespace_name": namespaceName, "action_type": kv.Dimensions.ActionType, "account": accountName}).Set(float64(kv.Sum.Requests))
}
if !deniedMetricsSet.Has(kvLatencyMetricName) {
kvLatency.With(prometheus.Labels{"namespace_name": namespaceName, "action_type": kv.Dimensions.ActionType, "account": accountName, "quantile": "P50"}).Set(float64(kv.Quantiles.LatencyMsP50))
kvLatency.With(prometheus.Labels{"namespace_name": namespaceName, "action_type": kv.Dimensions.ActionType, "account": accountName, "quantile": "P75"}).Set(float64(kv.Quantiles.LatencyMsP75))
kvLatency.With(prometheus.Labels{"namespace_name": namespaceName, "action_type": kv.Dimensions.ActionType, "account": accountName, "quantile": "P99"}).Set(float64(kv.Quantiles.LatencyMsP99))
kvLatency.With(prometheus.Labels{"namespace_name": namespaceName, "action_type": kv.Dimensions.ActionType, "account": accountName, "quantile": "P999"}).Set(float64(kv.Quantiles.LatencyMsP999))
}
}
}
}

func fetchLogpushAnalyticsForAccount(account cfaccounts.Account, wg *sync.WaitGroup) {
wg.Add(1)
defer wg.Done()
Expand Down
Loading