Skip to content

Commit f0becfe

Browse files
committed
feat(): optionally group by colo for zone stats
1 parent dbcd222 commit f0becfe

File tree

4 files changed

+346
-3
lines changed

4 files changed

+346
-3
lines changed
Lines changed: 147 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,147 @@
1+
{
2+
"data": {
3+
"viewer": {
4+
"zones": [
5+
{
6+
"zoneTag": "zone123abc",
7+
"httpRequestsAdaptiveGroups": [
8+
{
9+
"count": 150,
10+
"dimensions": {
11+
"edgeResponseStatus": 200,
12+
"clientRequestHTTPHost": "example.com",
13+
"coloCode": "SJC"
14+
},
15+
"quantiles": {
16+
"edgeTimeToFirstByteMsP50": 45.2,
17+
"edgeTimeToFirstByteMsP95": 120.5,
18+
"edgeTimeToFirstByteMsP99": 250.8,
19+
"originResponseDurationMsP50": 120.0,
20+
"originResponseDurationMsP95": 350.0,
21+
"originResponseDurationMsP99": 680.0
22+
}
23+
},
24+
{
25+
"count": 25,
26+
"dimensions": {
27+
"edgeResponseStatus": 304,
28+
"clientRequestHTTPHost": "example.com",
29+
"coloCode": "IAD"
30+
},
31+
"quantiles": {
32+
"edgeTimeToFirstByteMsP50": 12.1,
33+
"edgeTimeToFirstByteMsP95": 35.4,
34+
"edgeTimeToFirstByteMsP99": 78.2,
35+
"originResponseDurationMsP50": 0.0,
36+
"originResponseDurationMsP95": 0.0,
37+
"originResponseDurationMsP99": 0.0
38+
}
39+
},
40+
{
41+
"count": 10,
42+
"dimensions": {
43+
"edgeResponseStatus": 404,
44+
"clientRequestHTTPHost": "example.com",
45+
"coloCode": "LHR"
46+
},
47+
"quantiles": {
48+
"edgeTimeToFirstByteMsP50": 8.5,
49+
"edgeTimeToFirstByteMsP95": 22.3,
50+
"edgeTimeToFirstByteMsP99": 45.1,
51+
"originResponseDurationMsP50": 25.0,
52+
"originResponseDurationMsP95": 60.0,
53+
"originResponseDurationMsP99": 120.0
54+
}
55+
},
56+
{
57+
"count": 5,
58+
"dimensions": {
59+
"edgeResponseStatus": 500,
60+
"clientRequestHTTPHost": "example.com",
61+
"coloCode": "SJC"
62+
},
63+
"quantiles": {
64+
"edgeTimeToFirstByteMsP50": 350.2,
65+
"edgeTimeToFirstByteMsP95": 890.4,
66+
"edgeTimeToFirstByteMsP99": 1250.6,
67+
"originResponseDurationMsP50": 800.0,
68+
"originResponseDurationMsP95": 2200.0,
69+
"originResponseDurationMsP99": 4500.0
70+
}
71+
},
72+
{
73+
"count": 80,
74+
"dimensions": {
75+
"edgeResponseStatus": 200,
76+
"clientRequestHTTPHost": "api.example.com",
77+
"coloCode": "ORD"
78+
},
79+
"quantiles": {
80+
"edgeTimeToFirstByteMsP50": 65.8,
81+
"edgeTimeToFirstByteMsP95": 180.2,
82+
"edgeTimeToFirstByteMsP99": 320.5,
83+
"originResponseDurationMsP50": 180.0,
84+
"originResponseDurationMsP95": 520.0,
85+
"originResponseDurationMsP99": 950.0
86+
}
87+
},
88+
{
89+
"count": 3,
90+
"dimensions": {
91+
"edgeResponseStatus": 401,
92+
"clientRequestHTTPHost": "api.example.com",
93+
"coloCode": "DFW"
94+
},
95+
"quantiles": {
96+
"edgeTimeToFirstByteMsP50": 15.3,
97+
"edgeTimeToFirstByteMsP95": 42.1,
98+
"edgeTimeToFirstByteMsP99": 88.7,
99+
"originResponseDurationMsP50": 40.0,
100+
"originResponseDurationMsP95": 110.0,
101+
"originResponseDurationMsP99": 220.0
102+
}
103+
}
104+
]
105+
},
106+
{
107+
"zoneTag": "zone456def",
108+
"httpRequestsAdaptiveGroups": [
109+
{
110+
"count": 200,
111+
"dimensions": {
112+
"edgeResponseStatus": 200,
113+
"clientRequestHTTPHost": "myapp.io",
114+
"coloCode": "FRA"
115+
},
116+
"quantiles": {
117+
"edgeTimeToFirstByteMsP50": 55.4,
118+
"edgeTimeToFirstByteMsP95": 145.8,
119+
"edgeTimeToFirstByteMsP99": 285.2,
120+
"originResponseDurationMsP50": 150.0,
121+
"originResponseDurationMsP95": 420.0,
122+
"originResponseDurationMsP99": 780.0
123+
}
124+
},
125+
{
126+
"count": 15,
127+
"dimensions": {
128+
"edgeResponseStatus": 503,
129+
"clientRequestHTTPHost": "myapp.io",
130+
"coloCode": "NRT"
131+
},
132+
"quantiles": {
133+
"edgeTimeToFirstByteMsP50": 5200.1,
134+
"edgeTimeToFirstByteMsP95": 8500.5,
135+
"edgeTimeToFirstByteMsP99": 12000.8,
136+
"originResponseDurationMsP50": 15000.0,
137+
"originResponseDurationMsP95": 25000.0,
138+
"originResponseDurationMsP99": 30000.0
139+
}
140+
}
141+
]
142+
}
143+
]
144+
}
145+
},
146+
"errors": null
147+
}
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
query GetZoneHttpRequestsByColoQuery($zoneIDs: [String!], $datetimeStart: Time!, $datetimeEnd: Time!, $limit: Int!) {
2+
viewer {
3+
zones(filter: { zoneTag_in: $zoneIDs }) {
4+
zoneTag
5+
httpRequestsAdaptiveGroups(limit: $limit, filter: {
6+
datetime_geq: $datetimeStart,
7+
datetime_lt: $datetimeEnd,
8+
requestSource_in: ["eyeball"]
9+
}) {
10+
count
11+
dimensions {
12+
edgeResponseStatus
13+
clientRequestHTTPHost
14+
coloCode
15+
}
16+
quantiles {
17+
edgeTimeToFirstByteMsP50
18+
edgeTimeToFirstByteMsP95
19+
edgeTimeToFirstByteMsP99
20+
originResponseDurationMsP50
21+
originResponseDurationMsP95
22+
originResponseDurationMsP99
23+
}
24+
}
25+
}
26+
}
27+
}

src/gql.rs

Lines changed: 127 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ use chrono::NaiveDateTime;
33
use graphql_client::{GraphQLQuery, Response};
44
use opentelemetry_proto::tonic::metrics::v1::Metric;
55
use prometheus::{CounterVec, GaugeVec, Opts, Registry};
6+
use std::collections::HashSet;
67
use std::error::Error;
78
use worker::console_log;
89

@@ -50,6 +51,13 @@ pub struct GetQueueOperationsAnalyticsQuery;
5051
)]
5152
pub struct GetZoneHttpRequestsQuery;
5253

54+
#[derive(GraphQLQuery)]
55+
#[graphql(
56+
schema_path = "gql/schema.graphql",
57+
query_path = "gql/zone_http_requests_by_colo_query.graphql"
58+
)]
59+
pub struct GetZoneHttpRequestsByColoQuery;
60+
5361
#[allow(non_camel_case_types)]
5462
type float32 = f32;
5563

@@ -880,3 +888,122 @@ pub async fn do_get_zone_http_requests_query(
880888
timestamp_nanos,
881889
))
882890
}
891+
892+
pub async fn do_get_zone_http_requests_by_colo_query(
893+
cloudflare_api_url: &String,
894+
cloudflare_api_key: &String,
895+
variables: get_zone_http_requests_by_colo_query::Variables,
896+
debug_logging: bool,
897+
fallback_timestamp_nanos: u64,
898+
colo_hosts: &HashSet<String>,
899+
) -> Result<Vec<Metric>, Box<dyn Error>> {
900+
let request_body = GetZoneHttpRequestsByColoQuery::build_query(variables);
901+
if debug_logging {
902+
console_log!(
903+
"[ZoneHttpRequestsByColo] GraphQL request: {}",
904+
serde_json::to_string_pretty(&request_body).unwrap_or_default()
905+
);
906+
}
907+
let client = reqwest::Client::new();
908+
let res = client
909+
.post(cloudflare_api_url)
910+
.bearer_auth(cloudflare_api_key)
911+
.json(&request_body)
912+
.send()
913+
.await?;
914+
915+
if !res.status().is_success() {
916+
console_log!(
917+
"[ZoneHttpRequestsByColo] GraphQL query failed: {:?}",
918+
res.status()
919+
);
920+
return Err(Box::new(res.error_for_status().unwrap_err()));
921+
}
922+
923+
let response_text = res.text().await?;
924+
if debug_logging {
925+
console_log!("[ZoneHttpRequestsByColo] GraphQL response: {}", response_text);
926+
}
927+
let response_body: Response<get_zone_http_requests_by_colo_query::ResponseData> =
928+
serde_json::from_str(&response_text)?;
929+
if response_body.errors.is_some() {
930+
console_log!(
931+
"[ZoneHttpRequestsByColo] GraphQL query failed: {:?}",
932+
response_body.errors
933+
);
934+
return Err(Box::new(worker::Error::JsError("graphql".parse().unwrap())));
935+
}
936+
let response_data: get_zone_http_requests_by_colo_query::ResponseData =
937+
response_body.data.expect("missing response data");
938+
939+
let registry = Registry::new();
940+
let zone_ttfb_opts = Opts::new(
941+
"cloudflare_zone_edge_ttfb_ms",
942+
"Edge Time To First Byte - milliseconds",
943+
);
944+
let zone_ttfb =
945+
GaugeVec::new(zone_ttfb_opts, &["zone", "host", "colo", "quantile"]).unwrap();
946+
registry.register(Box::new(zone_ttfb.clone())).unwrap();
947+
948+
let zone_origin_response_duration_opts = Opts::new(
949+
"cloudflare_zone_origin_response_duration_ms",
950+
"Origin Response Duration - milliseconds",
951+
);
952+
let zone_origin_response_duration = GaugeVec::new(
953+
zone_origin_response_duration_opts,
954+
&["zone", "host", "colo", "quantile"],
955+
)
956+
.unwrap();
957+
registry
958+
.register(Box::new(zone_origin_response_duration.clone()))
959+
.unwrap();
960+
961+
let last_datetime: Option<Time> = None;
962+
for zone in response_data.viewer.unwrap().zones.iter() {
963+
let zone_tag = zone.zone_tag.clone();
964+
for group in zone.http_requests_adaptive_groups.iter() {
965+
let dimensions = group.dimensions.as_ref().unwrap();
966+
let host = dimensions.client_request_http_host.clone();
967+
let colo = dimensions.colo_code.clone();
968+
969+
// Only emit metrics for configured hosts
970+
if !colo_hosts.contains(&host) {
971+
continue;
972+
}
973+
974+
if let Some(quantiles) = &group.quantiles {
975+
zone_ttfb
976+
.with_label_values(&[zone_tag.as_str(), host.as_str(), colo.as_str(), "P50"])
977+
.set(quantiles.edge_time_to_first_byte_ms_p50);
978+
zone_ttfb
979+
.with_label_values(&[zone_tag.as_str(), host.as_str(), colo.as_str(), "P95"])
980+
.set(quantiles.edge_time_to_first_byte_ms_p95);
981+
zone_ttfb
982+
.with_label_values(&[zone_tag.as_str(), host.as_str(), colo.as_str(), "P99"])
983+
.set(quantiles.edge_time_to_first_byte_ms_p99);
984+
985+
zone_origin_response_duration
986+
.with_label_values(&[zone_tag.as_str(), host.as_str(), colo.as_str(), "P50"])
987+
.set(quantiles.origin_response_duration_ms_p50);
988+
zone_origin_response_duration
989+
.with_label_values(&[zone_tag.as_str(), host.as_str(), colo.as_str(), "P95"])
990+
.set(quantiles.origin_response_duration_ms_p95);
991+
zone_origin_response_duration
992+
.with_label_values(&[zone_tag.as_str(), host.as_str(), colo.as_str(), "P99"])
993+
.set(quantiles.origin_response_duration_ms_p99);
994+
}
995+
}
996+
}
997+
998+
let timestamp_nanos: u64 = last_datetime
999+
.map(|datetime| {
1000+
let datetime: NaiveDateTime = NaiveDateTime::parse_from_str(&datetime, "%+").unwrap();
1001+
datetime.and_utc().timestamp_nanos_opt().unwrap_or(0) as u64
1002+
})
1003+
.unwrap_or(fallback_timestamp_nanos);
1004+
1005+
Ok(prometheus_registry_to_opentelemetry_metrics(
1006+
registry,
1007+
timestamp_nanos,
1008+
))
1009+
}

src/lib.rs

Lines changed: 45 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,11 @@ use prost::Message;
77
use crate::gql::{
88
do_get_d1_analytics_query, do_get_durableobjects_analytics_query,
99
do_get_queue_backlog_analytics_query, do_get_queue_operations_analytics_query,
10-
do_get_workers_analytics_query, do_get_zone_http_requests_query, get_d1_analytics_query,
10+
do_get_workers_analytics_query, do_get_zone_http_requests_by_colo_query,
11+
do_get_zone_http_requests_query, get_d1_analytics_query,
1112
get_durable_objects_analytics_query, get_queue_backlog_analytics_query,
1213
get_queue_operations_analytics_query, get_workers_analytics_query,
13-
get_zone_http_requests_query,
14+
get_zone_http_requests_by_colo_query, get_zone_http_requests_query,
1415
};
1516
use worker::js_sys::Uint8Array;
1617
use worker::wasm_bindgen::JsValue;
@@ -237,7 +238,7 @@ async fn do_trigger(env: Env) -> Result<()> {
237238
&cloudflare_api_url,
238239
&cloudflare_api_key,
239240
get_zone_http_requests_query::Variables {
240-
zone_i_ds: Some(zone_ids),
241+
zone_i_ds: Some(zone_ids.clone()),
241242
datetime_start: start.to_rfc3339(),
242243
datetime_end: end.to_rfc3339(),
243244
limit: 9999,
@@ -260,6 +261,47 @@ async fn do_trigger(env: Env) -> Result<()> {
260261
return Err(Error::JsError(e.to_string()));
261262
}
262263
};
264+
265+
// Optionally query with coloCode dimension for configured hosts
266+
if let Ok(zone_colo_hosts_var) = env.var("ZONE_COLO_HOSTS") {
267+
let colo_hosts: std::collections::HashSet<String> = zone_colo_hosts_var
268+
.to_string()
269+
.split(',')
270+
.map(|s| s.trim().to_string())
271+
.filter(|s| !s.is_empty())
272+
.collect();
273+
274+
if !colo_hosts.is_empty() {
275+
let result = do_get_zone_http_requests_by_colo_query(
276+
&cloudflare_api_url,
277+
&cloudflare_api_key,
278+
get_zone_http_requests_by_colo_query::Variables {
279+
zone_i_ds: Some(zone_ids),
280+
datetime_start: start.to_rfc3339(),
281+
datetime_end: end.to_rfc3339(),
282+
limit: 9999,
283+
},
284+
debug_logging,
285+
fallback_timestamp_nanos,
286+
&colo_hosts,
287+
)
288+
.await;
289+
match result {
290+
Ok(metrics) => {
291+
for metric in metrics {
292+
all_metrics.push(metric);
293+
}
294+
}
295+
Err(e) => {
296+
console_log!(
297+
"Querying Cloudflare API for zone HTTP requests by colo failed: {:?}",
298+
e
299+
);
300+
return Err(Error::JsError(e.to_string()));
301+
}
302+
};
303+
}
304+
}
263305
}
264306
}
265307

0 commit comments

Comments
 (0)