Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
70 changes: 35 additions & 35 deletions data/br_2024_10.json
Original file line number Diff line number Diff line change
Expand Up @@ -10,48 +10,48 @@
},
"metrics": {
"download_throughput_mbps": {
"p1": 0.15979623373499155,
"p5": 0.9501991252036766,
"p10": 3.101174869710966,
"p25": 15.0340700432778,
"p50": 51.9831305263177,
"p75": 158.38962702858973,
"p90": 330.3352983503099,
"p95": 456.0950392154999,
"p99": 696.5613392781584
"p1": 0.1607952665485899,
"p5": 0.9527639351896139,
"p10": 3.0995791948048126,
"p25": 15.027442481667213,
"p50": 52.05003588762424,
"p75": 157.9133050283035,
"p90": 330.56865817664186,
"p95": 456.85148065886335,
"p99": 699.4150527600466
},
"upload_throughput_mbps": {
"p1": 0.042563080079753776,
"p5": 0.07560071683921148,
"p10": 0.08980854096320207,
"p25": 5.545812099052701,
"p50": 30.78175191467136,
"p75": 88.37694460346944,
"p90": 181.64033113619195,
"p95": 255.97876412741525,
"p99": 394.3416893812533
"p1": 0.04271425174361129,
"p5": 0.07560308380999751,
"p10": 0.08981165041087474,
"p25": 5.53270523375538,
"p50": 30.774797230694343,
"p75": 88.4152374160525,
"p90": 181.77498611764298,
"p95": 255.68482970928804,
"p99": 393.05831520691316
},
"latency_ms": {
"p1": 1.394,
"p5": 3.637,
"p10": 4.958,
"p25": 9.079,
"p1": 274.874,
"p5": 234.07,
"p10": 184.463,
"p25": 52.107,
"p50": 19.953,
"p75": 52.065,
"p90": 184.738,
"p95": 234.072,
"p99": 273.0
"p75": 9.072,
"p90": 4.958,
"p95": 3.64,
"p99": 1.394
},
"packet_loss": {
"p1": 0.0,
"p5": 0.0,
"p10": 0.0,
"p25": 1.1042755272820004e-05,
"p50": 0.004822712745559209,
"p75": 0.05811090765473097,
"p90": 0.13649207990035975,
"p95": 0.1987869577393624,
"p99": 0.3652163739953438
"p1": 0.3683998584485473,
"p5": 0.1989762612035137,
"p10": 0.13678163876238772,
"p25": 0.05818004802054541,
"p50": 0.004794188738463603,
"p75": 1.099611491732316e-05,
"p90": 0.0,
"p95": 0.0,
"p99": 0.0
}
}
}
72 changes: 36 additions & 36 deletions data/de_2024_10.json
Original file line number Diff line number Diff line change
Expand Up @@ -10,48 +10,48 @@
},
"metrics": {
"download_throughput_mbps": {
"p1": 0.22367850581560372,
"p5": 1.262769802856182,
"p10": 3.4166592054870026,
"p25": 13.817824595534129,
"p50": 45.24430302103892,
"p75": 100.56946051210859,
"p90": 248.78115747983244,
"p95": 377.8657642766346,
"p99": 741.7983223940372
"p1": 0.2225810589129801,
"p5": 1.2528819154630235,
"p10": 3.4209413897406646,
"p25": 13.810432910425352,
"p50": 45.27405796172478,
"p75": 100.56551588414163,
"p90": 248.79192117938703,
"p95": 377.46883271114353,
"p99": 741.7160739730066
},
"upload_throughput_mbps": {
"p1": 0.04798033204768874,
"p5": 0.07565187888251705,
"p10": 0.19852741925194242,
"p25": 3.5715003423978087,
"p50": 17.172955392453527,
"p75": 36.63458526768415,
"p90": 53.192909502396375,
"p95": 101.34444079000329,
"p99": 285.7324202068485
"p1": 0.04888864801257374,
"p5": 0.07565371155122488,
"p10": 0.20144847741476402,
"p25": 3.571516158290839,
"p50": 17.180642660658165,
"p75": 36.60604286113131,
"p90": 53.23036640523673,
"p95": 101.60189503128285,
"p99": 285.6942608280348
},
"latency_ms": {
"p1": 0.438,
"p5": 3.433,
"p10": 6.787,
"p25": 11.589,
"p50": 17.712,
"p75": 26.382,
"p90": 38.489,
"p95": 57.061,
"p99": 305.85
"p1": 304.0,
"p5": 57.103,
"p10": 38.461,
"p25": 26.383,
"p50": 17.711,
"p75": 11.597,
"p90": 6.791,
"p95": 3.482,
"p99": 0.438
},
"packet_loss": {
"p1": 0.0,
"p5": 0.0,
"p10": 0.0,
"p25": 0.0,
"p50": 0.00034573047467282084,
"p75": 0.016581558328885995,
"p90": 0.07073353719313655,
"p95": 0.11517449630011735,
"p99": 0.2521127443846117
"p1": 0.2521662550269682,
"p5": 0.11532512559748044,
"p10": 0.07065396520089343,
"p25": 0.016604288718428745,
"p50": 0.00034525252746869345,
"p75": 0.0,
"p90": 0.0,
"p95": 0.0,
"p99": 0.0
}
}
}
67 changes: 51 additions & 16 deletions data/query_downloads.sql
Original file line number Diff line number Diff line change
@@ -1,6 +1,33 @@
SELECT
client.Geo.CountryCode as country_code,
COUNT(*) as sample_count,

-- ============================================================================
-- PERCENTILE LABELING CONVENTION FOR IQB QUALITY ASSESSMENT
-- ============================================================================
--
-- For "higher is better" metrics (throughput):
-- - Raw p95 = "95% of users have ≤ X Mbit/s"
-- - Label: OFFSET(95) → download_p95 (standard statistical definition)
-- - Interpretation: top ~5% of users have > p95 throughput
--
-- For "lower is better" metrics (latency, packet loss):
-- - Raw p95 = "95% of users have ≤ X ms latency" (worst-case typical)
-- - We want p95 to represent best-case typical (to match throughput semantics)
-- - Solution: Invert labels - use raw p5 labeled as p95
-- - Label: OFFSET(5) → latency_p95 (inverted!)
-- - Interpretation: top ~5% of users (best latency) have < p95
--
-- Result: Uniform comparison logic where p95 always means "typical best
-- performance" rather than "typical worst performance"
--
-- NOTE: This creates semantics where checking p95 thresholds asks
-- "Can the top ~5% of users perform this use case?" - empirical validation
-- against real data will determine if this interpretation is appropriate.
-- ============================================================================

-- Download throughput (higher is better - NO INVERSION)
-- Standard percentile labels matching statistical definition
APPROX_QUANTILES(a.MeanThroughputMbps, 100)[OFFSET(1)] as download_p1,
APPROX_QUANTILES(a.MeanThroughputMbps, 100)[OFFSET(5)] as download_p5,
APPROX_QUANTILES(a.MeanThroughputMbps, 100)[OFFSET(10)] as download_p10,
Expand All @@ -10,24 +37,32 @@ SELECT
APPROX_QUANTILES(a.MeanThroughputMbps, 100)[OFFSET(90)] as download_p90,
APPROX_QUANTILES(a.MeanThroughputMbps, 100)[OFFSET(95)] as download_p95,
APPROX_QUANTILES(a.MeanThroughputMbps, 100)[OFFSET(99)] as download_p99,
APPROX_QUANTILES(a.MinRTT, 100)[OFFSET(1)] as latency_p1,
APPROX_QUANTILES(a.MinRTT, 100)[OFFSET(5)] as latency_p5,
APPROX_QUANTILES(a.MinRTT, 100)[OFFSET(10)] as latency_p10,
APPROX_QUANTILES(a.MinRTT, 100)[OFFSET(25)] as latency_p25,

-- Latency/MinRTT (lower is better - INVERTED LABELS!)
-- ⚠️ OFFSET(99) = worst latency = top 1% worst users → labeled as p1
-- ⚠️ OFFSET(5) = 5th percentile = best ~5% of users → labeled as p95
APPROX_QUANTILES(a.MinRTT, 100)[OFFSET(99)] as latency_p1,
APPROX_QUANTILES(a.MinRTT, 100)[OFFSET(95)] as latency_p5,
APPROX_QUANTILES(a.MinRTT, 100)[OFFSET(90)] as latency_p10,
APPROX_QUANTILES(a.MinRTT, 100)[OFFSET(75)] as latency_p25,
APPROX_QUANTILES(a.MinRTT, 100)[OFFSET(50)] as latency_p50,
APPROX_QUANTILES(a.MinRTT, 100)[OFFSET(75)] as latency_p75,
APPROX_QUANTILES(a.MinRTT, 100)[OFFSET(90)] as latency_p90,
APPROX_QUANTILES(a.MinRTT, 100)[OFFSET(95)] as latency_p95,
APPROX_QUANTILES(a.MinRTT, 100)[OFFSET(99)] as latency_p99,
APPROX_QUANTILES(a.LossRate, 100)[OFFSET(1)] as loss_p1,
APPROX_QUANTILES(a.LossRate, 100)[OFFSET(5)] as loss_p5,
APPROX_QUANTILES(a.LossRate, 100)[OFFSET(10)] as loss_p10,
APPROX_QUANTILES(a.LossRate, 100)[OFFSET(25)] as loss_p25,
APPROX_QUANTILES(a.MinRTT, 100)[OFFSET(25)] as latency_p75,
APPROX_QUANTILES(a.MinRTT, 100)[OFFSET(10)] as latency_p90,
APPROX_QUANTILES(a.MinRTT, 100)[OFFSET(5)] as latency_p95,
APPROX_QUANTILES(a.MinRTT, 100)[OFFSET(1)] as latency_p99,

-- Packet Loss Rate (lower is better - INVERTED LABELS!)
-- ⚠️ OFFSET(99) = worst loss = top 1% worst users → labeled as p1
-- ⚠️ OFFSET(5) = 5th percentile = best ~5% of users → labeled as p95
APPROX_QUANTILES(a.LossRate, 100)[OFFSET(99)] as loss_p1,
APPROX_QUANTILES(a.LossRate, 100)[OFFSET(95)] as loss_p5,
APPROX_QUANTILES(a.LossRate, 100)[OFFSET(90)] as loss_p10,
APPROX_QUANTILES(a.LossRate, 100)[OFFSET(75)] as loss_p25,
APPROX_QUANTILES(a.LossRate, 100)[OFFSET(50)] as loss_p50,
APPROX_QUANTILES(a.LossRate, 100)[OFFSET(75)] as loss_p75,
APPROX_QUANTILES(a.LossRate, 100)[OFFSET(90)] as loss_p90,
APPROX_QUANTILES(a.LossRate, 100)[OFFSET(95)] as loss_p95,
APPROX_QUANTILES(a.LossRate, 100)[OFFSET(99)] as loss_p99
APPROX_QUANTILES(a.LossRate, 100)[OFFSET(25)] as loss_p75,
APPROX_QUANTILES(a.LossRate, 100)[OFFSET(10)] as loss_p90,
APPROX_QUANTILES(a.LossRate, 100)[OFFSET(5)] as loss_p95,
APPROX_QUANTILES(a.LossRate, 100)[OFFSET(1)] as loss_p99
FROM
`measurement-lab.ndt.unified_downloads`
WHERE
Expand Down
13 changes: 13 additions & 0 deletions data/query_uploads.sql
Original file line number Diff line number Diff line change
@@ -1,6 +1,19 @@
SELECT
client.Geo.CountryCode as country_code,
COUNT(*) as sample_count,

-- ============================================================================
-- PERCENTILE LABELING CONVENTION FOR IQB QUALITY ASSESSMENT
-- ============================================================================
--
-- Upload throughput is "higher is better", so we use standard percentile
-- labels (no inversion).
--
-- See query_downloads.sql for detailed explanation and rationale.
-- ============================================================================

-- Upload throughput (higher is better - NO INVERSION)
-- Standard percentile labels matching statistical definition
APPROX_QUANTILES(a.MeanThroughputMbps, 100)[OFFSET(1)] as upload_p1,
APPROX_QUANTILES(a.MeanThroughputMbps, 100)[OFFSET(5)] as upload_p5,
APPROX_QUANTILES(a.MeanThroughputMbps, 100)[OFFSET(10)] as upload_p10,
Expand Down
72 changes: 36 additions & 36 deletions data/us_2024_10.json
Original file line number Diff line number Diff line change
Expand Up @@ -10,48 +10,48 @@
},
"metrics": {
"download_throughput_mbps": {
"p1": 0.37354810526833476,
"p5": 2.7494108827310177,
"p10": 7.6575433038007406,
"p25": 29.94873577502137,
"p50": 96.36533017831101,
"p75": 268.1810327939917,
"p90": 474.1768162996085,
"p95": 625.4494125653449,
"p99": 893.2782851912168
"p1": 0.3812327371493097,
"p5": 2.7476349376135296,
"p10": 7.638638876969803,
"p25": 29.89884264277766,
"p50": 96.2643838797045,
"p75": 268.4085597033553,
"p90": 474.14118932768235,
"p95": 626.3132682965055,
"p99": 893.0568504937463
},
"upload_throughput_mbps": {
"p1": 0.06279911698366483,
"p5": 0.15105079102447938,
"p10": 1.0130561597157441,
"p25": 8.030055616329323,
"p50": 20.95814566696693,
"p75": 65.73945359925672,
"p90": 223.9767416770114,
"p95": 370.4336035390081,
"p99": 813.7319533731953
"p1": 0.06257042412674083,
"p5": 0.15144845324010167,
"p10": 0.9992760254839029,
"p25": 8.03213984894271,
"p50": 20.98046809727222,
"p75": 65.669501568909,
"p90": 224.29692902729298,
"p95": 368.91185081459395,
"p99": 819.839280930373
},
"latency_ms": {
"p1": 0.16,
"p5": 0.808,
"p10": 2.886,
"p25": 7.778,
"p50": 16.124,
"p75": 30.0,
"p90": 51.303,
"p95": 80.55,
"p99": 251.545
"p1": 255.993,
"p5": 80.759,
"p10": 51.2,
"p25": 30.0,
"p50": 16.119,
"p75": 7.783,
"p90": 2.895,
"p95": 0.804,
"p99": 0.161
},
"packet_loss": {
"p1": 0.0,
"p5": 0.0,
"p10": 0.0,
"p25": 0.0,
"p50": 0.000516724336793541,
"p75": 0.019090240380880846,
"p90": 0.07332944466732425,
"p95": 0.12018590164702943,
"p99": 0.253111989432024
"p1": 0.2517569864889713,
"p5": 0.11998957375627733,
"p10": 0.07340665854872248,
"p25": 0.019064946948168876,
"p50": 0.0005185937475477769,
"p75": 0.0,
"p90": 0.0,
"p95": 0.0,
"p99": 0.0
}
}
}
24 changes: 24 additions & 0 deletions library/src/iqb/cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,30 @@ def get_data(
Raises:
FileNotFoundError: If requested data is not available in cache.
ValueError: If requested percentile is not available in cached data.

⚠️ PERCENTILE INTERPRETATION (CRITICAL!)
=========================================

For "higher is better" metrics (throughput):
- Raw p95 = "95% of users have ≤ 625 Mbit/s speed"
- Directly usable: download_p95 ≥ threshold?
- No inversion needed (standard statistical definition)

For "lower is better" metrics (latency, packet loss):
- Raw p95 = "95% of users have ≤ 80ms latency" (worst-case typical)
- We want p95 to represent best-case typical (to match throughput)
- Solution: Use p5 raw labeled as p95
- Mathematical inversion: p(X)_labeled = p(100-X)_raw
- Example: OFFSET(5) raw → labeled as "latency_p95" in JSON

This inversion happens in BigQuery (see data/query_*.sql),
so this cache code treats all percentiles uniformly.

When you request percentile=95, you get the 95th percentile value
that can be compared uniformly against thresholds.

NOTE: This creates semantics where p95 represents "typical best
performance" - empirical validation will determine if appropriate.
"""
# Design Note
# -----------
Expand Down
Loading
Loading