Skip to content

Commit 4a00297

Browse files
authored
feat: add by_country_subdivision1_asn queries (#81)
This diff adds `by_country_subdivision1_asn` queries and starts adding the results of some of these queries to the GitHub cache inside of the v0.1.0 release.
1 parent 5d684fb commit 4a00297

14 files changed

+165
-20
lines changed

data/generate_data.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ def generate_for_period(
5151
"country_city",
5252
"country_city_asn",
5353
"country_subdivision1",
54+
"country_subdivision1_asn",
5455
)
5556

5657
directions = ("downloads", "uploads")

data/ghcache.json

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,14 @@
3232
"sha256": "87017d792903ef9a61c92db012e3705ca825c17b610459cb84fb80b6370e25a2",
3333
"url": "https://github.com/m-lab/iqb/releases/download/v0.1.0/87017d792903__cache__v1__20241001T000000Z__20241101T000000Z__downloads_by_country_subdivision1__stats.json"
3434
},
35+
"cache/v1/20241001T000000Z/20241101T000000Z/downloads_by_country_subdivision1_asn/data.parquet": {
36+
"sha256": "2e91a55c8dd3e6e45d9a6c3fbac0cf7c311b59a404526c0880c9ea17f4a6bd6c",
37+
"url": "https://github.com/m-lab/iqb/releases/download/v0.1.0/2e91a55c8dd3__cache__v1__20241001T000000Z__20241101T000000Z__downloads_by_country_subdivision1_asn__data.parquet"
38+
},
39+
"cache/v1/20241001T000000Z/20241101T000000Z/downloads_by_country_subdivision1_asn/stats.json": {
40+
"sha256": "81d08c5faa2a426bd072a3f666f7e117430f7a24578c821e5aa6d9687b06f75a",
41+
"url": "https://github.com/m-lab/iqb/releases/download/v0.1.0/81d08c5faa2a__cache__v1__20241001T000000Z__20241101T000000Z__downloads_by_country_subdivision1_asn__stats.json"
42+
},
3543
"cache/v1/20241001T000000Z/20241101T000000Z/uploads_by_country_asn/data.parquet": {
3644
"sha256": "fdace39ccd4a7a4036f454be102193472a3a67ef7d506c49e2afe2d3e959effc",
3745
"url": "https://github.com/m-lab/iqb/releases/download/v0.1.0/fdace39ccd4a__cache__v1__20241001T000000Z__20241101T000000Z__uploads_by_country_asn__data.parquet"
@@ -64,6 +72,14 @@
6472
"sha256": "3b144feca8d741cde0bc4c11a6fba0255f932869d9c9b9adc571a7385c9ed2e9",
6573
"url": "https://github.com/m-lab/iqb/releases/download/v0.1.0/3b144feca8d7__cache__v1__20241001T000000Z__20241101T000000Z__uploads_by_country_subdivision1__stats.json"
6674
},
75+
"cache/v1/20241001T000000Z/20241101T000000Z/uploads_by_country_subdivision1_asn/data.parquet": {
76+
"sha256": "1e363dfc4ccf552fe654b33f4d6615746e9892720c64e03dc7e34874e03a5e78",
77+
"url": "https://github.com/m-lab/iqb/releases/download/v0.1.0/1e363dfc4ccf__cache__v1__20241001T000000Z__20241101T000000Z__uploads_by_country_subdivision1_asn__data.parquet"
78+
},
79+
"cache/v1/20241001T000000Z/20241101T000000Z/uploads_by_country_subdivision1_asn/stats.json": {
80+
"sha256": "adbb8a8779ed8c1b787c12f7f6098b3d5617fa658abfad1011700d84ac74376a",
81+
"url": "https://github.com/m-lab/iqb/releases/download/v0.1.0/adbb8a8779ed__cache__v1__20241001T000000Z__20241101T000000Z__uploads_by_country_subdivision1_asn__stats.json"
82+
},
6783
"cache/v1/20251001T000000Z/20251101T000000Z/downloads_by_country_asn/data.parquet": {
6884
"sha256": "4900bb6b7eea84086d65afc8f593b759862d893bdeb694430178dca87217f1e3",
6985
"url": "https://github.com/m-lab/iqb/releases/download/v0.1.0/4900bb6b7eea__cache__v1__20251001T000000Z__20251101T000000Z__downloads_by_country_asn__data.parquet"

library/src/iqb/queries/downloads_by_country.sql

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -64,8 +64,8 @@ SELECT
6464
APPROX_QUANTILES(a.LossRate, 100)[OFFSET(5)] as loss_p95,
6565
APPROX_QUANTILES(a.LossRate, 100)[OFFSET(1)] as loss_p99
6666
FROM
67-
-- TODO(bassosimone): switch to union tables `measurement-lab.ndt.ndt7_union`
68-
-- when they have been blessed as the new stable tables.
67+
-- TODO(bassosimone): current unified_downloads/unified_uploads tables lack BYOS
68+
-- support. We'll eventually need to switch to better tables.
6969
`measurement-lab.ndt.unified_downloads`
7070
WHERE
7171
date >= "{START_DATE}" AND date < "{END_DATE}"

library/src/iqb/queries/downloads_by_country_asn.sql

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -66,8 +66,8 @@ SELECT
6666
APPROX_QUANTILES(a.LossRate, 100)[OFFSET(5)] as loss_p95,
6767
APPROX_QUANTILES(a.LossRate, 100)[OFFSET(1)] as loss_p99
6868
FROM
69-
-- TODO(bassosimone): switch to union tables `measurement-lab.ndt.ndt7_union`
70-
-- when they have been blessed as the new stable tables.
69+
-- TODO(bassosimone): current unified_downloads/unified_uploads tables lack BYOS
70+
-- support. We'll eventually need to switch to better tables.
7171
`measurement-lab.ndt.unified_downloads`
7272
WHERE
7373
date >= "{START_DATE}" AND date < "{END_DATE}"

library/src/iqb/queries/downloads_by_country_city.sql

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -67,8 +67,8 @@ SELECT
6767
APPROX_QUANTILES(a.LossRate, 100)[OFFSET(5)] as loss_p95,
6868
APPROX_QUANTILES(a.LossRate, 100)[OFFSET(1)] as loss_p99
6969
FROM
70-
-- TODO(bassosimone): switch to union tables `measurement-lab.ndt.ndt7_union`
71-
-- when they have been blessed as the new stable tables.
70+
-- TODO(bassosimone): current unified_downloads/unified_uploads tables lack BYOS
71+
-- support. We'll eventually need to switch to better tables.
7272
`measurement-lab.ndt.unified_downloads`
7373
WHERE
7474
date >= "{START_DATE}" AND date < "{END_DATE}"

library/src/iqb/queries/downloads_by_country_city_asn.sql

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -69,8 +69,8 @@ SELECT
6969
APPROX_QUANTILES(a.LossRate, 100)[OFFSET(5)] as loss_p95,
7070
APPROX_QUANTILES(a.LossRate, 100)[OFFSET(1)] as loss_p99
7171
FROM
72-
-- TODO(bassosimone): switch to union tables `measurement-lab.ndt.ndt7_union`
73-
-- when they have been blessed as the new stable tables.
72+
-- TODO(bassosimone): current unified_downloads/unified_uploads tables lack BYOS
73+
-- support. We'll eventually need to switch to better tables.
7474
`measurement-lab.ndt.unified_downloads`
7575
WHERE
7676
date >= "{START_DATE}" AND date < "{END_DATE}"

library/src/iqb/queries/downloads_by_country_subdivision1.sql

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -66,8 +66,8 @@ SELECT
6666
APPROX_QUANTILES(a.LossRate, 100)[OFFSET(5)] as loss_p95,
6767
APPROX_QUANTILES(a.LossRate, 100)[OFFSET(1)] as loss_p99
6868
FROM
69-
-- TODO(bassosimone): switch to union tables `measurement-lab.ndt.ndt7_union`
70-
-- when they have been blessed as the new stable tables.
69+
-- TODO(bassosimone): current unified_downloads/unified_uploads tables lack BYOS
70+
-- support. We'll eventually need to switch to better tables.
7171
`measurement-lab.ndt.unified_downloads`
7272
WHERE
7373
date >= "{START_DATE}" AND date < "{END_DATE}"
Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
SELECT
2+
client.Geo.CountryCode as country_code,
3+
client.Geo.Subdivision1ISOCode as subdivision1_iso_code,
4+
client.Geo.Subdivision1Name as subdivision1_name,
5+
client.Network.ASNumber as asn,
6+
client.Network.ASName as as_name,
7+
COUNT(*) as sample_count,
8+
9+
-- ============================================================================
10+
-- PERCENTILE LABELING CONVENTION FOR IQB QUALITY ASSESSMENT
11+
-- ============================================================================
12+
--
13+
-- For "higher is better" metrics (throughput):
14+
-- - Raw p95 = "95% of users have ≤ X Mbit/s"
15+
-- - Label: OFFSET(95) → download_p95 (standard statistical definition)
16+
-- - Interpretation: top ~5% of users have > p95 throughput
17+
--
18+
-- For "lower is better" metrics (latency, packet loss):
19+
-- - Raw p95 = "95% of users have ≤ X ms latency" (worst-case typical)
20+
-- - We want p95 to represent best-case typical (to match throughput semantics)
21+
-- - Solution: Invert labels - use raw p5 labeled as p95
22+
-- - Label: OFFSET(5) → latency_p95 (inverted!)
23+
-- - Interpretation: top ~5% of users (best latency) have < p95
24+
--
25+
-- Result: Uniform comparison logic where p95 always means "typical best
26+
-- performance" rather than "typical worst performance"
27+
--
28+
-- NOTE: This creates semantics where checking p95 thresholds asks
29+
-- "Can the top ~5% of users perform this use case?" - empirical validation
30+
-- against real data will determine if this interpretation is appropriate.
31+
-- ============================================================================
32+
33+
-- Download throughput (higher is better - NO INVERSION)
34+
-- Standard percentile labels matching statistical definition
35+
APPROX_QUANTILES(a.MeanThroughputMbps, 100)[OFFSET(1)] as download_p1,
36+
APPROX_QUANTILES(a.MeanThroughputMbps, 100)[OFFSET(5)] as download_p5,
37+
APPROX_QUANTILES(a.MeanThroughputMbps, 100)[OFFSET(10)] as download_p10,
38+
APPROX_QUANTILES(a.MeanThroughputMbps, 100)[OFFSET(25)] as download_p25,
39+
APPROX_QUANTILES(a.MeanThroughputMbps, 100)[OFFSET(50)] as download_p50,
40+
APPROX_QUANTILES(a.MeanThroughputMbps, 100)[OFFSET(75)] as download_p75,
41+
APPROX_QUANTILES(a.MeanThroughputMbps, 100)[OFFSET(90)] as download_p90,
42+
APPROX_QUANTILES(a.MeanThroughputMbps, 100)[OFFSET(95)] as download_p95,
43+
APPROX_QUANTILES(a.MeanThroughputMbps, 100)[OFFSET(99)] as download_p99,
44+
45+
-- Latency/MinRTT (lower is better - INVERTED LABELS!)
46+
-- ⚠️ OFFSET(99) = worst latency = top 1% worst users → labeled as p1
47+
-- ⚠️ OFFSET(5) = 5th percentile = best ~5% of users → labeled as p95
48+
APPROX_QUANTILES(a.MinRTT, 100)[OFFSET(99)] as latency_p1,
49+
APPROX_QUANTILES(a.MinRTT, 100)[OFFSET(95)] as latency_p5,
50+
APPROX_QUANTILES(a.MinRTT, 100)[OFFSET(90)] as latency_p10,
51+
APPROX_QUANTILES(a.MinRTT, 100)[OFFSET(75)] as latency_p25,
52+
APPROX_QUANTILES(a.MinRTT, 100)[OFFSET(50)] as latency_p50,
53+
APPROX_QUANTILES(a.MinRTT, 100)[OFFSET(25)] as latency_p75,
54+
APPROX_QUANTILES(a.MinRTT, 100)[OFFSET(10)] as latency_p90,
55+
APPROX_QUANTILES(a.MinRTT, 100)[OFFSET(5)] as latency_p95,
56+
APPROX_QUANTILES(a.MinRTT, 100)[OFFSET(1)] as latency_p99,
57+
58+
-- Packet Loss Rate (lower is better - INVERTED LABELS!)
59+
-- ⚠️ OFFSET(99) = worst loss = top 1% worst users → labeled as p1
60+
-- ⚠️ OFFSET(5) = 5th percentile = best ~5% of users → labeled as p95
61+
APPROX_QUANTILES(a.LossRate, 100)[OFFSET(99)] as loss_p1,
62+
APPROX_QUANTILES(a.LossRate, 100)[OFFSET(95)] as loss_p5,
63+
APPROX_QUANTILES(a.LossRate, 100)[OFFSET(90)] as loss_p10,
64+
APPROX_QUANTILES(a.LossRate, 100)[OFFSET(75)] as loss_p25,
65+
APPROX_QUANTILES(a.LossRate, 100)[OFFSET(50)] as loss_p50,
66+
APPROX_QUANTILES(a.LossRate, 100)[OFFSET(25)] as loss_p75,
67+
APPROX_QUANTILES(a.LossRate, 100)[OFFSET(10)] as loss_p90,
68+
APPROX_QUANTILES(a.LossRate, 100)[OFFSET(5)] as loss_p95,
69+
APPROX_QUANTILES(a.LossRate, 100)[OFFSET(1)] as loss_p99
70+
FROM
71+
-- TODO(bassosimone): current unified_downloads/unified_uploads tables lack BYOS
72+
-- support. We'll eventually need to switch to better tables.
73+
`measurement-lab.ndt.unified_downloads`
74+
WHERE
75+
date >= "{START_DATE}" AND date < "{END_DATE}"
76+
AND client.Geo.CountryCode IS NOT NULL
77+
AND client.Geo.Subdivision1ISOCode IS NOT NULL
78+
AND client.Geo.Subdivision1Name IS NOT NULL
79+
AND client.Network.ASNumber IS NOT NULL
80+
AND client.Network.ASName IS NOT NULL
81+
AND a.MeanThroughputMbps IS NOT NULL
82+
AND a.MinRTT IS NOT NULL
83+
AND a.LossRate IS NOT NULL
84+
GROUP BY country_code, subdivision1_iso_code, subdivision1_name, asn, as_name
85+
ORDER BY country_code, subdivision1_iso_code, subdivision1_name, asn, as_name

library/src/iqb/queries/uploads_by_country.sql

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,8 @@ SELECT
2424
APPROX_QUANTILES(a.MeanThroughputMbps, 100)[OFFSET(95)] as upload_p95,
2525
APPROX_QUANTILES(a.MeanThroughputMbps, 100)[OFFSET(99)] as upload_p99
2626
FROM
27-
-- TODO(bassosimone): switch to union tables `measurement-lab.ndt.ndt7_union`
28-
-- when they have been blessed as the new stable tables.
27+
-- TODO(bassosimone): current unified_downloads/unified_uploads tables lack BYOS
28+
-- support. We'll eventually need to switch to better tables.
2929
`measurement-lab.ndt.unified_uploads`
3030
WHERE
3131
date >= "{START_DATE}" AND date < "{END_DATE}"

library/src/iqb/queries/uploads_by_country_asn.sql

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,8 @@ SELECT
2626
APPROX_QUANTILES(a.MeanThroughputMbps, 100)[OFFSET(95)] as upload_p95,
2727
APPROX_QUANTILES(a.MeanThroughputMbps, 100)[OFFSET(99)] as upload_p99
2828
FROM
29-
-- TODO(bassosimone): switch to union tables `measurement-lab.ndt.ndt7_union`
30-
-- when they have been blessed as the new stable tables.
29+
-- TODO(bassosimone): current unified_downloads/unified_uploads tables lack BYOS
30+
-- support. We'll eventually need to switch to better tables.
3131
`measurement-lab.ndt.unified_uploads`
3232
WHERE
3333
date >= "{START_DATE}" AND date < "{END_DATE}"

0 commit comments

Comments
 (0)