Skip to content

Commit f204191

Browse files
Merge pull request #71 from hubverse-org/mc/hub-data
refactor to use the hubdata package for hub access
2 parents 311f8fc + 07470eb commit f204191

File tree

67 files changed

+694
-1016
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

67 files changed

+694
-1016
lines changed

demo/index.html

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525
</div>
2626

2727
<script type="module">
28-
import App from 'https://cdn.jsdelivr.net/gh/reichlab/predtimechart@3.0.0/dist/predtimechart.bundle.js';
28+
import App from 'https://cdn.jsdelivr.net/gh/reichlab/predtimechart@3.1.1/dist/predtimechart.bundle.js';
2929
document.predtimechart = App; // for debugging
3030

3131
function replace_chars(the_string) {

pyproject.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,8 @@ dependencies = [
1717
"pyyaml",
1818
"structlog",
1919
"pyarrow",
20-
"jsonschema"
20+
"jsonschema",
21+
"hubdata>=0.1.3",
2122
]
2223

2324
[project.optional-dependencies]

requirements/requirements.txt

Lines changed: 20 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -10,20 +10,25 @@ attrs==24.2.0
1010
# referencing
1111
build==1.2.1
1212
# via pip-tools
13-
click==8.1.7
13+
click==8.2.1
1414
# via
1515
# hub-dashboard-predtimechart (pyproject.toml)
16+
# hubdata
1617
# pip-tools
18+
hubdata==0.1.3
19+
# via hub-dashboard-predtimechart (pyproject.toml)
1720
iniconfig==2.0.0
1821
# via pytest
1922
jsonschema==4.23.0
2023
# via hub-dashboard-predtimechart (pyproject.toml)
2124
jsonschema-specifications==2023.12.1
2225
# via jsonschema
26+
markdown-it-py==3.0.0
27+
# via rich
28+
mdurl==0.1.2
29+
# via markdown-it-py
2330
numpy==2.1.0
24-
# via
25-
# pandas
26-
# pyarrow
31+
# via pandas
2732
packaging==24.1
2833
# via
2934
# build
@@ -36,8 +41,12 @@ pluggy==1.5.0
3641
# via pytest
3742
polars==1.11.0
3843
# via hub-dashboard-predtimechart (pyproject.toml)
39-
pyarrow==17.0.0
40-
# via hub-dashboard-predtimechart (pyproject.toml)
44+
pyarrow==21.0.0
45+
# via
46+
# hub-dashboard-predtimechart (pyproject.toml)
47+
# hubdata
48+
pygments==2.19.2
49+
# via rich
4150
pyproject-hooks==1.1.0
4251
# via
4352
# build
@@ -54,14 +63,18 @@ referencing==0.35.1
5463
# via
5564
# jsonschema
5665
# jsonschema-specifications
66+
rich==14.1.0
67+
# via hubdata
5768
rpds-py==0.20.0
5869
# via
5970
# jsonschema
6071
# referencing
6172
six==1.16.0
6273
# via python-dateutil
6374
structlog==24.4.0
64-
# via hub-dashboard-predtimechart (pyproject.toml)
75+
# via
76+
# hub-dashboard-predtimechart (pyproject.toml)
77+
# hubdata
6578
tzdata==2024.1
6679
# via pandas
6780
wheel==0.44.0

src/hub_predtimechart/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = "2.2.0"
1+
__version__ = "2.2.1"

src/hub_predtimechart/app/generate_target_json_files.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -133,11 +133,11 @@ def ptc_target_data(model_task: ModelTask, target_data_df: pl.DataFrame, task_id
133133
else:
134134
target_data_df = target_data_df.filter(pl.col('as_of') == max_as_of.isoformat())
135135
else:
136-
# the file is one that is assumed to be updated weekly and so we can
137-
# assume that the effective as_of date for this file is the same as
138-
# max_available_ref_date (the newest date)
139-
if max_available_ref_date is not None and reference_date != max_available_ref_date:
140-
return None
136+
# the file is one that is assumed to be updated weekly and so we can
137+
# assume that the effective as_of date for this file is the same as
138+
# max_available_ref_date (the newest date)
139+
if max_available_ref_date is not None and reference_date != max_available_ref_date:
140+
return None
141141

142142
# until all hubs implement our new time-series target data standard, we condition on
143143
# hub_config.target_data_file_name, which acts as a flag indicating whether the hub implements the new standard or

src/hub_predtimechart/hub_config.py

Lines changed: 0 additions & 29 deletions
This file was deleted.

src/hub_predtimechart/hub_config_ptc.py

Lines changed: 28 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -7,17 +7,25 @@
77
import pandas as pd
88
import polars as pl
99
import yaml
10-
from jsonschema import ValidationError, validate, FormatChecker
10+
from hubdata import HubConnection
11+
from jsonschema import FormatChecker, ValidationError, validate
1112

12-
from hub_predtimechart.hub_config import HubConfig
1313
from hub_predtimechart.ptc_schema import ptc_config_schema
1414

1515

16-
class HubConfigPtc(HubConfig):
16+
class HubConfigPtc(HubConnection):
1717
"""
18-
A HubConfig subclass that adds various visualization-related variables from a hub.
18+
A `hubdata.HubConnection` subclass that adds various visualization-related variables from a hub. Note that this
19+
class only works with local filesystems, and therefore only accepts a Path for `hub_path`.
1920
2021
Instance variables:
22+
23+
Via HubConnection:
24+
- hub_path: str or Path pointing to a hub's root directory as passed to `hubdata.connect_hub()`
25+
- tasks: the hub's `tasks.json` contents as a dict
26+
- model_metadata_schema: "" `model-metadata-schema.json` ""
27+
28+
Via this class:
2129
- rounds_idx: as loaded from `ptc_config_file`
2230
- reference_date_col_name: ""
2331
- horizon_col_name: ""
@@ -36,13 +44,16 @@ class HubConfigPtc(HubConfig):
3644
"""
3745

3846

39-
def __init__(self, hub_dir: Path, ptc_config_file: Path):
47+
def __init__(self, hub_path: Path, ptc_config_file: Path):
4048
"""
41-
:param hub_dir: as defined in HubConfig.__init__()
49+
:param hub_path: Path pointing to a hub's root directory as passed to `hubdata.connect_hub()`
4250
:param ptc_config_file: location of `predtimechart-config.yml` (or other named) file that matches ptc_schema.py.
43-
this file specifies how to process `hub_dir` to get predtimechart output
51+
this file specifies how to process `hub_path` to get predtimechart output
4452
"""
45-
super().__init__(hub_dir)
53+
if not isinstance(hub_path, Path):
54+
raise TypeError(f"hub_path was not a Path. hub_path={hub_path!r}, type={type(hub_path).__name__}")
55+
56+
super().__init__(hub_path)
4657

4758
if not ptc_config_file.exists():
4859
raise RuntimeError(f"predtimechart config file not found: {ptc_config_file}")
@@ -67,8 +78,8 @@ def __init__(self, hub_dir: Path, ptc_config_file: Path):
6778

6879
# set model_id_to_metadata
6980
self.model_id_to_metadata: dict[str, dict] = {}
70-
for model_metadata_file in (list((self.hub_dir / 'model-metadata').glob('*.yml')) +
71-
list((self.hub_dir / 'model-metadata').glob('*.yaml'))):
81+
for model_metadata_file in (list((self.hub_path / 'model-metadata').glob('*.yml')) +
82+
list((self.hub_path / 'model-metadata').glob('*.yaml'))):
7283
with open(model_metadata_file) as fp:
7384
model_metadata = yaml.safe_load(fp)
7485
model_id = f"{model_metadata['team_abbr']}-{model_metadata['model_abbr']}"
@@ -90,8 +101,8 @@ def model_output_file_for_ref_date(self, model_id: str, reference_date: str) ->
90101
Returns a Path to the model output file corresponding to `model_id` and `reference_date`. Returns None if none
91102
found.
92103
"""
93-
poss_output_files = [self.hub_dir / 'model-output' / model_id / f"{reference_date}-{model_id}.csv",
94-
self.hub_dir / 'model-output' / model_id / f"{reference_date}-{model_id}.parquet"]
104+
poss_output_files = [self.hub_path / 'model-output' / model_id / f"{reference_date}-{model_id}.csv",
105+
self.hub_path / 'model-output' / model_id / f"{reference_date}-{model_id}.parquet"]
95106
for poss_output_file in poss_output_files:
96107
if poss_output_file.exists():
97108
return poss_output_file
@@ -104,7 +115,7 @@ def get_target_data_df(self) -> pl.DataFrame:
104115
Loads the target data csv file from the hub repo for now, file path for target data is hard coded to 'target-data'.
105116
Raises FileNotFoundError if target data file does not exist.
106117
"""
107-
target_data_file_path = self.hub_dir / 'target-data' / self.get_target_data_file_name()
118+
target_data_file_path = self.hub_path / 'target-data' / self.get_target_data_file_name()
108119
try:
109120
# the override schema handles the 'US' location (the only location that doesn't parse as Int64)
110121
# todo hard-coded column names
@@ -165,8 +176,10 @@ def _validate_hub_ptc_compatibility(hub_config_ptc: HubConfigPtc):
165176
if not hub_config_ptc.model_tasks:
166177
raise ValidationError(f"no applicable model_task entries were found")
167178

168-
# validate: model metadata must contain a boolean `designated_model` field
169-
if 'designated_model' not in hub_config_ptc.model_metadata_schema['required']:
179+
# validate: model metadata must be present and must contain a boolean `designated_model` field
180+
if hub_config_ptc.model_metadata_schema is None:
181+
raise ValidationError(f"model metadata schema not found")
182+
elif 'designated_model' not in hub_config_ptc.model_metadata_schema['required']:
170183
raise ValidationError(f"'designated_model' not found in model metadata schema's 'required' section")
171184

172185
# validate: all model_task entries have the same task_ids. frozenset lets us make a set of sets

tests/hub_predtimechart/test_hub_config.py

Lines changed: 0 additions & 25 deletions
This file was deleted.

0 commit comments

Comments
 (0)