Skip to content

Commit cba4edc

Browse files
committed
feat: add IQBCache component
This commit implements the IQBCache component for fetching IQB measurement data from local cache files. Key features: 1. Git-like .iqb/ directory convention (customizable) 2. Support for US, DE, BR data (October 2024) 3. Percentile extraction (1-99) with helpful error messages 4. Good test coverage (13 tests) We adopt a convention where .iqb/ is the default cache directory in the current working directory (like .git/), but this can be overridden by passing a custom path. While there, we address these additional quality improvements: 1. Add .editorconfig for cross-editor consistency 2. Add explicit ruff formatting rules (spaces, double quotes, LF) 3. Add ruff format/lint/pyright checks to CI pipeline
1 parent 73508fd commit cba4edc

File tree

8 files changed

+404
-14
lines changed

8 files changed

+404
-14
lines changed

.editorconfig

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
# EditorConfig for m-lab/iqb
2+
# https://editorconfig.org
3+
4+
root = true
5+
6+
# Defaults for all files
7+
[*]
8+
charset = utf-8
9+
end_of_line = lf
10+
insert_final_newline = true
11+
trim_trailing_whitespace = true
12+
13+
# Python source files
14+
[*.py]
15+
indent_style = space
16+
indent_size = 4
17+
max_line_length = 100
18+
19+
# YAML files
20+
[*.{yml,yaml}]
21+
indent_style = space
22+
indent_size = 2
23+
24+
# JSON files
25+
[*.json]
26+
indent_style = space
27+
indent_size = 2
28+
29+
# Markdown files
30+
[*.md]
31+
indent_style = space
32+
indent_size = 2
33+
34+
# Makefiles require tabs
35+
[Makefile]
36+
indent_style = tab
37+
38+
# Shell scripts
39+
[*.sh]
40+
indent_style = space
41+
indent_size = 2

.github/workflows/ci.yml

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,33 @@ jobs:
2525
- name: Sync workspace dependencies
2626
run: uv sync --dev
2727

28+
- name: Check code formatting with ruff
29+
working-directory: library
30+
run: |
31+
uv run ruff format --check || {
32+
echo "❌ Code formatting check failed!"
33+
echo "To fix locally, run: cd library && uv run ruff format"
34+
exit 1
35+
}
36+
37+
- name: Lint code with ruff
38+
working-directory: library
39+
run: |
40+
uv run ruff check || {
41+
echo "❌ Linting check failed!"
42+
echo "To fix locally, run: cd library && uv run ruff check --fix"
43+
exit 1
44+
}
45+
46+
- name: Type check with pyright
47+
working-directory: library
48+
run: |
49+
uv run pyright || {
50+
echo "❌ Type checking failed!"
51+
echo "To see errors locally, run: cd library && uv run pyright"
52+
exit 1
53+
}
54+
2855
- name: Run pytest
2956
working-directory: library
3057
run: uv run pytest

library/pyproject.toml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,11 @@ addopts = [
4646
line-length = 100
4747
target-version = "py313"
4848

49+
[tool.ruff.format]
50+
indent-style = "space"
51+
quote-style = "double"
52+
line-ending = "lf"
53+
4954
[tool.ruff.lint]
5055
select = [
5156
"E", # pycodestyle errors

library/src/iqb/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,12 @@
44
network measurement data, weight matrices, and quality thresholds.
55
"""
66

7+
from .cache import IQBCache
78
from .calculator import IQBCalculator
89
from .config import IQB_CONFIG
910

1011
# Backward compatibility alias
1112
IQB = IQBCalculator
1213

13-
__all__ = ["IQB", "IQBCalculator", "IQB_CONFIG"]
14+
__all__ = ["IQB", "IQBCalculator", "IQBCache", "IQB_CONFIG"]
1415
__version__ = "0.1.0"

library/src/iqb/cache.py

Lines changed: 139 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,139 @@
1+
"""Module for fetching IQB measurement data from cache.
2+
3+
The IQBCache component manages local caching of IQB measurement data, following
4+
a Git-like convention for storing local state.
5+
6+
Cache Directory Convention
7+
---------------------------
8+
By default, IQBCache looks for a `.iqb/` directory in the current working
9+
directory, similar to how Git uses `.git/` for repository state. This provides:
10+
11+
- Clear ownership (`.iqb/` contains IQB-specific data)
12+
- Per-project isolation (each project has its own cache)
13+
- Conventional pattern (like `.cache/`, `.config/`, etc.)
14+
15+
Example usage:
16+
17+
# Uses ./.iqb/ in current directory
18+
cache = IQBCache()
19+
20+
# Or specify custom location
21+
cache = IQBCache(cache_dir="/shared/iqb-cache")
22+
"""
23+
24+
import json
25+
from datetime import datetime
26+
from pathlib import Path
27+
28+
29+
class IQBCache:
30+
"""Component for fetching IQB measurement data from cache."""
31+
32+
def __init__(self, cache_dir: str | Path | None = None):
33+
"""
34+
Initialize cache with data directory path.
35+
36+
Parameters:
37+
cache_dir: Path to directory containing cached data files.
38+
If None, defaults to ./.iqb/ in current working directory.
39+
"""
40+
if cache_dir is None:
41+
# Git-like convention: local state in .iqb/ directory
42+
self.cache_dir = Path.cwd() / ".iqb"
43+
else:
44+
self.cache_dir = Path(cache_dir)
45+
46+
def get_data(
47+
self,
48+
country: str,
49+
start_date: datetime,
50+
end_date: datetime | None = None,
51+
percentile: int = 95,
52+
) -> dict:
53+
"""
54+
Fetch measurement data for IQB calculation.
55+
56+
Args:
57+
country: ISO 2-letter country code ("US", "DE", "BR").
58+
start_date: Start of date range (inclusive).
59+
end_date: End of date range (exclusive). If None, defaults to start_date + 1 month.
60+
percentile: Which percentile to extract (1-99).
61+
62+
Returns:
63+
dict with keys for IQBCalculator:
64+
{
65+
"download_throughput_mbps": float,
66+
"upload_throughput_mbps": float,
67+
"latency_ms": float,
68+
"packet_loss": float,
69+
}
70+
71+
Raises:
72+
FileNotFoundError: If requested data is not available in cache.
73+
ValueError: If requested percentile is not available in cached data.
74+
"""
75+
# Hard-coded data we have: October 2024 for US, DE, BR
76+
country_lower = country.lower()
77+
78+
# Check if we have this exact data
79+
if country_lower == "us" and start_date == datetime(2024, 10, 1) and end_date is None:
80+
filename = "us_2024_10.json"
81+
elif country_lower == "de" and start_date == datetime(2024, 10, 1) and end_date is None:
82+
filename = "de_2024_10.json"
83+
elif country_lower == "br" and start_date == datetime(2024, 10, 1) and end_date is None:
84+
filename = "br_2024_10.json"
85+
else:
86+
raise FileNotFoundError(
87+
f"No cached data for country={country}, "
88+
f"start_date={start_date}, end_date={end_date}. "
89+
f"Currently only have: US/DE/BR for October 2024."
90+
)
91+
92+
# Load from file
93+
filepath = self.cache_dir / filename
94+
if not filepath.exists():
95+
raise FileNotFoundError(f"Cache file missing: {filepath}")
96+
97+
with open(filepath) as f:
98+
data = json.load(f)
99+
100+
# Extract the requested percentile
101+
return self._extract_percentile(data, percentile)
102+
103+
def _extract_percentile(self, data: dict, percentile: int) -> dict:
104+
"""
105+
Extract specific percentile from JSON data.
106+
107+
Converts from JSON format to IQBCalculator format:
108+
- Input: {"metrics": {"download_throughput_mbps": {"p95": 123, ...}, ...}}
109+
- Output: {"download_throughput_mbps": 123, ...}
110+
111+
Args:
112+
data: Full JSON data structure from cache file.
113+
percentile: Which percentile to extract.
114+
115+
Returns:
116+
dict with metric values for the specified percentile.
117+
118+
Raises:
119+
ValueError: If requested percentile is not available in the cached data.
120+
"""
121+
metrics = data["metrics"]
122+
p_key = f"p{percentile}"
123+
124+
try:
125+
return {
126+
"download_throughput_mbps": metrics["download_throughput_mbps"][p_key],
127+
"upload_throughput_mbps": metrics["upload_throughput_mbps"][p_key],
128+
"latency_ms": metrics["latency_ms"][p_key],
129+
"packet_loss": metrics["packet_loss"][p_key],
130+
}
131+
except KeyError as err:
132+
# Determine which percentiles ARE available
133+
available = sorted(
134+
[int(k[1:]) for k in metrics["download_throughput_mbps"] if k.startswith("p")]
135+
)
136+
raise ValueError(
137+
f"Percentile {percentile} not available in cached data. "
138+
f"Available percentiles: {available}"
139+
) from err

library/src/iqb/calculator.py

Lines changed: 6 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -56,9 +56,7 @@ def print_config(self):
5656
for uc in IQB_CONFIG["use cases"]:
5757
for nr in IQB_CONFIG["use cases"][uc]["network requirements"]:
5858
nr_w = IQB_CONFIG["use cases"][uc]["network requirements"][nr]["w"]
59-
nr_th = IQB_CONFIG["use cases"][uc]["network requirements"][nr][
60-
"threshold min"
61-
]
59+
nr_th = IQB_CONFIG["use cases"][uc]["network requirements"][nr]["threshold min"]
6260
print(f"\t{uc:20} \t{nr:20} \t{nr_w} \t{nr_th}")
6361
print()
6462

@@ -113,19 +111,15 @@ def calculate_iqb_score(self, data=None, print_details=False):
113111
nr_weights = []
114112
for nr in self.config["use cases"][uc]["network requirements"]:
115113
nr_w = self.config["use cases"][uc]["network requirements"][nr]["w"]
116-
nr_th = self.config["use cases"][uc]["network requirements"][nr][
117-
"threshold min"
118-
]
114+
nr_th = self.config["use cases"][uc]["network requirements"][nr]["threshold min"]
119115

120116
# TODO: TEMP method for calculating binary requirement scores. To be
121117
# updated with weighted average of scores per dataset.
122118
ds_s = []
123-
for ds in self.config["use cases"][uc]["network requirements"][nr][
124-
"datasets"
125-
]:
126-
ds_w = self.config["use cases"][uc]["network requirements"][nr][
127-
"datasets"
128-
][ds]["w"]
119+
for ds in self.config["use cases"][uc]["network requirements"][nr]["datasets"]:
120+
ds_w = self.config["use cases"][uc]["network requirements"][nr]["datasets"][ds][
121+
"w"
122+
]
129123
if ds_w > 0:
130124
# binary requirement score (dataset, network requirement)
131125
brs = self.calculate_binary_requirement_score(

0 commit comments

Comments
 (0)