11"""Simulate tax-benefit policy and derive society-level output statistics."""
2-
2+
33from pydantic import BaseModel , Field
44from typing import Literal
55from .constants import DEFAULT_DATASETS_BY_COUNTRY
1010from .utils .reforms import ParametricReform
1111from policyengine_core .reforms import Reform as StructuralReform
1212from policyengine_core .data import Dataset
13- from .utils .huggingface import download
1413from policyengine_us import (
1514 Simulation as USSimulation ,
1615 Microsimulation as USMicrosimulation ,
2625from functools import wraps , partial
2726from typing import Dict , Any , Callable
2827import importlib
28+ from policyengine .utils .data_download import download
2929
3030CountryType = Literal ["uk" , "us" ]
3131ScopeType = Literal ["household" , "macro" ]
@@ -78,6 +78,7 @@ def __init__(self, **options: SimulationOptions):
7878 self .options .country
7979 ]
8080
81+ self ._set_data ()
8182 self ._initialise_simulations ()
8283 self ._add_output_functions ()
8384
@@ -118,7 +119,36 @@ def _set_data(self):
118119 self .options .country
119120 ]
120121
121- self ._data_handle_cps_special_case ()
122+ if isinstance (self .options .data , str ):
123+ filename = self .options .data
124+ if "://" in self .options .data :
125+ bucket = None
126+ hf_repo = None
127+ hf_org = None
128+ if "gs://" in self .options .data :
129+ bucket , filename = self .options .data .split ("://" )[
130+ - 1
131+ ].split ("/" )
132+ elif "hf://" in self .options .data :
133+ hf_org , hf_repo , filename = self .options .data .split ("://" )[
134+ - 1
135+ ].split ("/" , 2 )
136+
137+ if not Path (filename ).exists ():
138+ file_path = download (
139+ filepath = filename ,
140+ huggingface_org = hf_org ,
141+ huggingface_repo = hf_repo ,
142+ gcs_bucket = bucket ,
143+ )
144+ filename = str (Path (file_path ))
145+ if "cps_2023" in filename :
146+ time_period = 2023
147+ else :
148+ time_period = None
149+ self .options .data = Dataset .from_file (
150+ filename , time_period = time_period
151+ )
122152
123153 def _initialise_simulations (self ):
124154 self .baseline_simulation = self ._initialise_simulation (
@@ -228,10 +258,9 @@ def _apply_region_to_simulation(
228258 elif "constituency/" in region :
229259 constituency = region .split ("/" )[1 ]
230260 constituency_names_file_path = download (
231- repo = "policyengine/policyengine-uk-data" ,
232- repo_filename = "constituencies_2024.csv" ,
233- local_folder = None ,
234- version = None ,
261+ huggingface_repo = "policyengine-uk-data" ,
262+ gcs_bucket = "policyengine-uk-data-private" ,
263+ filepath = "constituencies_2024.csv" ,
235264 )
236265 constituency_names_file_path = Path (
237266 constituency_names_file_path
@@ -250,10 +279,9 @@ def _apply_region_to_simulation(
250279 f"Constituency { constituency } not found. See { constituency_names_file_path } for the list of available constituencies."
251280 )
252281 weights_file_path = download (
253- repo = "policyengine/policyengine-uk-data" ,
254- repo_filename = "parliamentary_constituency_weights.h5" ,
255- local_folder = None ,
256- version = None ,
282+ huggingface_repo = "policyengine-uk-data" ,
283+ gcs_bucket = "policyengine-uk-data-private" ,
284+ filepath = "parliamentary_constituency_weights.h5" ,
257285 )
258286
259287 with h5py .File (weights_file_path , "r" ) as f :
@@ -267,10 +295,9 @@ def _apply_region_to_simulation(
267295 elif "local_authority/" in region :
268296 la = region .split ("/" )[1 ]
269297 la_names_file_path = download (
270- repo = "policyengine/policyengine-uk-data" ,
271- repo_filename = "local_authorities_2021.csv" ,
272- local_folder = None ,
273- version = None ,
298+ huggingface_repo = "policyengine-uk-data" ,
299+ gcs_bucket = "policyengine-uk-data-private" ,
300+ filepath = "local_authorities_2021.csv" ,
274301 )
275302 la_names_file_path = Path (la_names_file_path )
276303 la_names = pd .read_csv (la_names_file_path )
@@ -283,10 +310,9 @@ def _apply_region_to_simulation(
283310 f"Local authority { la } not found. See { la_names_file_path } for the list of available local authorities."
284311 )
285312 weights_file_path = download (
286- repo = "policyengine/policyengine-uk-data" ,
287- repo_filename = "local_authority_weights.h5" ,
288- local_folder = None ,
289- version = None ,
313+ huggingface_repo = "policyengine-uk-data" ,
314+ gcs_bucket = "policyengine-uk-data-private" ,
315+ filepath = "local_authority_weights.h5" ,
290316 )
291317
292318 with h5py .File (weights_file_path , "r" ) as f :
@@ -299,21 +325,3 @@ def _apply_region_to_simulation(
299325 )
300326
301327 return simulation
302-
303- def _data_handle_cps_special_case (self ):
304- """Handle special case for CPS data- this data doesn't specify time periods for each variable, but we still use it intensively."""
305- if self .data is not None and "cps_2023" in self .data :
306- if "hf://" in self .data :
307- owner , repo , filename = self .data .split ("/" )[- 3 :]
308- if "@" in filename :
309- version = filename .split ("@" )[- 1 ]
310- filename = filename .split ("@" )[0 ]
311- else :
312- version = None
313- self .data = download (
314- repo = owner + "/" + repo ,
315- repo_filename = filename ,
316- local_folder = None ,
317- version = version ,
318- )
319- self .data = Dataset .from_file (self .data , "2023" )
0 commit comments