Skip to content

Commit ebd736f

Browse files
Merge pull request #148 from PolicyEngine/nikhilwoodruff/issue147
Add `versions` argument to `Simulation`
2 parents f66cdfe + 60b20cf commit ebd736f

19 files changed

+354
-193
lines changed

.github/workflows/any_changes.yaml

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,9 @@ on:
88

99
jobs:
1010
docs:
11+
permissions:
12+
contents: "read"
13+
id-token: "write"
1114
name: Test documentation builds
1215
runs-on: ubuntu-latest
1316
steps:
@@ -20,14 +23,16 @@ jobs:
2023
uses: actions/setup-python@v2
2124
with:
2225
python-version: '3.11'
26+
- uses: "google-github-actions/auth@v2"
27+
with:
28+
workload_identity_provider: "projects/322898545428/locations/global/workloadIdentityPools/policyengine-research-id-pool/providers/prod-github-provider"
29+
service_account: "policyengine-research@policyengine-research.iam.gserviceaccount.com"
2330

2431
- name: Install package
2532
run: uv pip install .[dev] --system
2633

2734
- name: Test documentation builds
2835
run: make documentation
29-
env:
30-
HUGGING_FACE_TOKEN: ${{ secrets.HUGGING_FACE_TOKEN }}
3136

3237
- name: Check documentation build
3338
run: |

.github/workflows/code_changes.yaml

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,9 @@ jobs:
2121
args: ". -l 79 --check"
2222
Test:
2323
runs-on: ubuntu-latest
24+
permissions:
25+
contents: "read"
26+
id-token: "write"
2427
steps:
2528
- name: Checkout repo
2629
uses: actions/checkout@v2
@@ -31,11 +34,13 @@ jobs:
3134
uses: actions/setup-python@v2
3235
with:
3336
python-version: '3.11'
37+
- uses: "google-github-actions/auth@v2"
38+
with:
39+
workload_identity_provider: "projects/322898545428/locations/global/workloadIdentityPools/policyengine-research-id-pool/providers/prod-github-provider"
40+
service_account: "policyengine-research@policyengine-research.iam.gserviceaccount.com"
3441

3542
- name: Install package
3643
run: uv pip install .[dev] --system
3744

3845
- name: Run tests
39-
run: make test
40-
env:
41-
HUGGING_FACE_TOKEN: ${{ secrets.HUGGING_FACE_TOKEN }}
46+
run: make test

.github/workflows/publish_documentation.yaml

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,9 @@ on:
77

88
jobs:
99
Publish:
10+
permissions:
11+
contents: "read"
12+
id-token: "write"
1013
runs-on: ubuntu-latest
1114
steps:
1215
- name: Checkout repo
@@ -15,15 +18,17 @@ jobs:
1518
uses: actions/setup-python@v5
1619
with:
1720
python-version: 3.12
21+
- uses: "google-github-actions/auth@v2"
22+
with:
23+
workload_identity_provider: "projects/322898545428/locations/global/workloadIdentityPools/policyengine-research-id-pool/providers/prod-github-provider"
24+
service_account: "policyengine-research@policyengine-research.iam.gserviceaccount.com"
1825
- name: Publish a git tag
1926
run: ".github/publish-git-tag.sh || true"
2027
- name: Install package
2128
run: make install
2229

2330
- name: Build documentation
2431
run: make documentation
25-
env:
26-
HUGGING_FACE_TOKEN: ${{ secrets.HUGGING_FACE_TOKEN }}
2732

2833
- name: Deploy documentation
2934
uses: JamesIves/github-pages-deploy-action@releases/v3

.github/workflows/publish_package.yaml

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,8 +36,6 @@ jobs:
3636

3737
- name: Test documentation builds
3838
run: make documentation
39-
env:
40-
HUGGING_FACE_TOKEN: ${{ secrets.HUGGING_FACE_TOKEN }}
4139

4240
- name: Deploy documentation
4341
uses: JamesIves/github-pages-deploy-action@releases/v3

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -162,3 +162,6 @@ cython_debug/
162162
*.ipynb
163163

164164
!docs/**/*.ipynb
165+
166+
**/*.h5
167+
**/*.csv

changelog_entry.yaml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
- bump: minor
2+
changes:
3+
added:
4+
- Error handling for data and package version mismatches.

docs/concepts/simulation.ipynb

Lines changed: 31 additions & 5 deletions
Large diffs are not rendered by default.

policyengine/constants.py

Lines changed: 14 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -2,40 +2,26 @@
22

33
from policyengine_core.data import Dataset
44
from policyengine.utils.data_download import download
5+
from typing import Tuple, Optional
56

6-
# Datasets
7-
ENHANCED_FRS = "hf://policyengine/policyengine-uk-data/enhanced_frs_2022_23.h5"
8-
FRS = "hf://policyengine/policyengine-uk-data/frs_2022_23.h5"
9-
ENHANCED_CPS = "hf://policyengine/policyengine-us-data/enhanced_cps_2024.h5"
10-
CPS = "hf://policyengine/policyengine-us-data/cps_2023.h5"
11-
POOLED_CPS = "hf://policyengine/policyengine-us-data/pooled_3_year_cps_2023.h5"
7+
EFRS_2022 = "gcs://policyengine-uk-data-private/enhanced_frs_2022_23.h5"
8+
FRS_2022 = "gcs://policyengine-uk-data-private/frs_2022_23.h5"
9+
CPS_2023_POOLED = "gcs://policyengine-us-data/pooled_3_year_cps_2023.h5"
10+
CPS_2023 = "gcs://policyengine-us-data/cps_2023.h5"
11+
ECPS_2024 = "gcs://policyengine-us-data/ecps_2024.h5"
1212

1313

14-
def get_default_dataset(country: str, region: str):
14+
def get_default_dataset(
15+
country: str, region: str, version: Optional[str] = None
16+
) -> str:
1517
if country == "uk":
16-
data_file = download(
17-
filepath="enhanced_frs_2022_23.h5",
18-
huggingface_repo="policyengine-uk-data",
19-
gcs_bucket="policyengine-uk-data-private",
20-
)
21-
time_period = None
18+
return EFRS_2022
2219
elif country == "us":
2320
if region is not None and region != "us":
24-
data_file = download(
25-
filepath="pooled_3_year_cps_2023.h5",
26-
huggingface_repo="policyengine-us-data",
27-
gcs_bucket="policyengine-us-data",
28-
)
29-
time_period = 2023
21+
return CPS_2023_POOLED
3022
else:
31-
data_file = download(
32-
filepath="cps_2023.h5",
33-
huggingface_repo="policyengine-us-data",
34-
gcs_bucket="policyengine-us-data",
35-
)
36-
time_period = 2023
23+
return CPS_2023
3724

38-
return Dataset.from_file(
39-
file_path=data_file,
40-
time_period=time_period,
25+
raise ValueError(
26+
f"Unable to select a default dataset for country {country} and region {region}."
4127
)

policyengine/outputs/macro/comparison/calculate_economy_comparison.py

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,7 @@
1010
from policyengine.outputs.macro.single.calculate_single_economy import (
1111
SingleEconomy,
1212
)
13-
from policyengine.utils.packages import get_country_package_version
14-
from typing import List, Dict
13+
from typing import List, Dict, Optional
1514

1615

1716
class BudgetaryImpact(BaseModel):
@@ -711,7 +710,6 @@ def uk_constituency_breakdown(
711710
reform_hnet = reform.household_net_income
712711

713712
constituency_weights_path = download(
714-
huggingface_repo="policyengine-uk-data",
715713
gcs_bucket="policyengine-uk-data-private",
716714
filepath="parliamentary_constituency_weights.h5",
717715
)
@@ -721,7 +719,6 @@ def uk_constituency_breakdown(
721719
] # {2025: array(650, 100180) where cell i, j is the weight of household record i in constituency j}
722720

723721
constituency_names_path = download(
724-
huggingface_repo="policyengine-uk-data",
725722
gcs_bucket="policyengine-uk-data-private",
726723
filepath="constituencies_2024.csv",
727724
)
@@ -786,7 +783,10 @@ class CliffImpact(BaseModel):
786783

787784

788785
class EconomyComparison(BaseModel):
789-
country_package_version: str
786+
model_version: Optional[str] = (
787+
None # Optional while some datasets have no tagged version.
788+
)
789+
data_version: Optional[str] = None
790790
budget: BudgetaryImpact
791791
detailed_budget: DetailedBudgetaryImpact
792792
decile: DecileImpact
@@ -849,7 +849,8 @@ def calculate_economy_comparison(
849849
cliff_impact = None
850850

851851
return EconomyComparison(
852-
country_package_version=get_country_package_version(country_id),
852+
model_version=simulation.model_version,
853+
data_version=simulation.data_version,
853854
budget=budgetary_impact_data,
854855
detailed_budget=detailed_budgetary_impact_data,
855856
decile=decile_impact_data,

policyengine/simulation.py

Lines changed: 58 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -18,10 +18,11 @@
1818
Simulation as UKSimulation,
1919
Microsimulation as UKMicrosimulation,
2020
)
21+
from importlib import metadata
2122
import h5py
2223
from pathlib import Path
2324
import pandas as pd
24-
from typing import Type
25+
from typing import Type, Optional
2526
from functools import wraps, partial
2627
from typing import Dict, Any, Callable
2728
import importlib
@@ -34,8 +35,8 @@
3435
) # Needs stricter typing. Any==policyengine_core.data.Dataset, but pydantic refuses for some reason.
3536
TimePeriodType = int
3637
ReformType = ParametricReform | Type[StructuralReform] | None
37-
RegionType = str | None
38-
SubsampleType = int | None
38+
RegionType = Optional[str]
39+
SubsampleType = Optional[int]
3940

4041

4142
class SimulationOptions(BaseModel):
@@ -54,14 +55,22 @@ class SimulationOptions(BaseModel):
5455
None,
5556
description="How many, if a subsample, households to randomly simulate.",
5657
)
57-
title: str | None = Field(
58+
title: Optional[str] = Field(
5859
"[Analysis title]",
5960
description="The title of the analysis (for charts). If not provided, a default title will be generated.",
6061
)
61-
include_cliffs: bool | None = Field(
62+
include_cliffs: Optional[bool] = Field(
6263
False,
6364
description="Whether to include tax-benefit cliffs in the simulation analyses. If True, cliffs will be included.",
6465
)
66+
model_version: Optional[str] = Field(
67+
None,
68+
description="The version of the country model used in the simulation. If not provided, the current package version will be used. If provided, this package will throw an error if the package version does not match. Use this as an extra safety check.",
69+
)
70+
data_version: Optional[str] = Field(
71+
None,
72+
description="The version of the data used in the simulation. If not provided, the current data version will be used. If provided, this package will throw an error if the data version does not match. Use this as an extra safety check.",
73+
)
6574

6675

6776
class Simulation:
@@ -73,12 +82,16 @@ class Simulation:
7382
"""The baseline tax-benefit simulation."""
7483
reform_simulation: CountrySimulation | None = None
7584
"""The reform tax-benefit simulation."""
85+
data_version: Optional[str] = None
86+
"""The version of the data used in the simulation."""
87+
model_version: Optional[str] = None
7688

7789
def __init__(self, **options: SimulationOptions):
7890
self.options = SimulationOptions(**options)
79-
91+
self.check_model_version()
8092
self._set_data()
8193
self._initialise_simulations()
94+
self.check_data_version()
8295
self._add_output_functions()
8396

8497
def _add_output_functions(self):
@@ -119,29 +132,23 @@ def _set_data(self):
119132
region=self.options.region,
120133
)
121134

122-
elif isinstance(self.options.data, str):
135+
if isinstance(self.options.data, str):
123136
filename = self.options.data
124-
if "://" in self.options.data:
125-
bucket = None
126-
hf_repo = None
127-
hf_org = None
128-
if "gs://" in self.options.data:
129-
bucket, filename = self.options.data.split("://")[
130-
-1
131-
].split("/")
132-
hf_org = "policyengine"
133-
elif "hf://" in self.options.data:
134-
hf_org, hf_repo, filename = self.options.data.split("://")[
135-
-1
136-
].split("/", 2)
137+
if self.options.data[:6] == "gcs://":
138+
bucket, filename = self.options.data.split("://")[-1].split(
139+
"/"
140+
)
141+
version = self.options.data_version
137142

138143
file_path = download(
139144
filepath=filename,
140-
huggingface_org=hf_org,
141-
huggingface_repo=hf_repo,
142145
gcs_bucket=bucket,
146+
version=version,
143147
)
144148
filename = str(Path(file_path))
149+
else:
150+
# If it's a local file, we can't infer the version.
151+
version = None
145152
if "cps_2023" in filename:
146153
time_period = 2023
147154
else:
@@ -260,7 +267,6 @@ def _apply_region_to_simulation(
260267
elif "constituency/" in region:
261268
constituency = region.split("/")[1]
262269
constituency_names_file_path = download(
263-
huggingface_repo="policyengine-uk-data",
264270
gcs_bucket="policyengine-uk-data-private",
265271
filepath="constituencies_2024.csv",
266272
)
@@ -281,7 +287,6 @@ def _apply_region_to_simulation(
281287
f"Constituency {constituency} not found. See {constituency_names_file_path} for the list of available constituencies."
282288
)
283289
weights_file_path = download(
284-
huggingface_repo="policyengine-uk-data",
285290
gcs_bucket="policyengine-uk-data-private",
286291
filepath="parliamentary_constituency_weights.h5",
287292
)
@@ -297,7 +302,6 @@ def _apply_region_to_simulation(
297302
elif "local_authority/" in region:
298303
la = region.split("/")[1]
299304
la_names_file_path = download(
300-
huggingface_repo="policyengine-uk-data",
301305
gcs_bucket="policyengine-uk-data-private",
302306
filepath="local_authorities_2021.csv",
303307
)
@@ -312,7 +316,6 @@ def _apply_region_to_simulation(
312316
f"Local authority {la} not found. See {la_names_file_path} for the list of available local authorities."
313317
)
314318
weights_file_path = download(
315-
huggingface_repo="policyengine-uk-data",
316319
gcs_bucket="policyengine-uk-data-private",
317320
filepath="local_authority_weights.h5",
318321
)
@@ -327,3 +330,32 @@ def _apply_region_to_simulation(
327330
)
328331

329332
return simulation
333+
334+
def check_model_version(self) -> None:
335+
"""
336+
Check the package versions of the simulation against the current package versions.
337+
"""
338+
if self.options.model_version is not None:
339+
target_version = self.options.model_version
340+
package = f"policyengine-{self.options.country}"
341+
try:
342+
installed_version = metadata.version(package)
343+
self.model_version = installed_version
344+
except metadata.PackageNotFoundError:
345+
raise ValueError(
346+
f"Package {package} not found. Try running `pip install {package}`."
347+
)
348+
if installed_version != target_version:
349+
raise ValueError(
350+
f"Package {package} version {installed_version} does not match expected version {target_version}. Try running `pip install {package}=={target_version}`."
351+
)
352+
353+
def check_data_version(self) -> None:
354+
"""
355+
Check the data versions of the simulation against the current data versions.
356+
"""
357+
if self.options.data_version is not None:
358+
if self.data_version != self.options.data_version:
359+
raise ValueError(
360+
f"Data version {self.data_version} does not match expected version {self.options.data_version}."
361+
)

0 commit comments

Comments
 (0)