Skip to content

Commit 47d0097

Browse files
Small ECPS is too large (#308)
* Small CPS is too large Fixes #307 * Rename to ECPS
1 parent 4dd3388 commit 47d0097

File tree

5 files changed

+12
-7
lines changed

5 files changed

+12
-7
lines changed

Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ data:
4242
python policyengine_us_data/datasets/puf/puf.py
4343
python policyengine_us_data/datasets/cps/extended_cps.py
4444
python policyengine_us_data/datasets/cps/enhanced_cps.py
45-
python policyengine_us_data/datasets/cps/small_cps.py
45+
python policyengine_us_data/datasets/cps/small_enhanced_cps.py
4646

4747
clean:
4848
rm -f policyengine_us_data/storage/*.h5

changelog_entry.yaml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
- bump: patch
2+
changes:
3+
fixed:
4+
- Small CPS is now 1000 households.

policyengine_us_data/datasets/cps/small_cps.py renamed to policyengine_us_data/datasets/cps/small_enhanced_cps.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import numpy as np
22

33

4-
def create_small_cps():
4+
def create_small_ecps():
55
from policyengine_us import Microsimulation
66
from policyengine_us_data.datasets import EnhancedCPS_2024
77
from policyengine_us_data.storage import STORAGE_FOLDER
@@ -10,6 +10,7 @@ def create_small_cps():
1010
simulation = Microsimulation(
1111
dataset=EnhancedCPS_2024,
1212
)
13+
simulation.subsample(1_000)
1314

1415
data = {}
1516
for variable in simulation.tax_benefit_system.variables:
@@ -29,13 +30,13 @@ def create_small_cps():
2930

3031
import h5py
3132

32-
with h5py.File(STORAGE_FOLDER / "small_cps_2024.h5", "w") as f:
33+
with h5py.File(STORAGE_FOLDER / "small_enhanced_cps_2024.h5", "w") as f:
3334
for variable, periods in data.items():
3435
grp = f.create_group(variable)
3536
for period, values in periods.items():
3637
grp.create_dataset(str(period), data=values)
3738

3839

3940
if __name__ == "__main__":
40-
create_small_cps()
41+
create_small_ecps()
4142
print("Small CPS dataset created successfully.")

policyengine_us_data/storage/upload_completed_datasets.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ def upload_datasets():
1414
EnhancedCPS_2024.file_path,
1515
Pooled_3_Year_CPS_2023.file_path,
1616
CPS_2023.file_path,
17-
STORAGE_FOLDER / "small_cps_2024.h5",
17+
STORAGE_FOLDER / "small_enhanced_cps_2024.h5",
1818
]
1919

2020
for file_path in dataset_files:

policyengine_us_data/tests/test_datasets/test_small_cps.py renamed to policyengine_us_data/tests/test_datasets/test_small_enhanced_cps.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,14 +3,14 @@
33

44

55
@pytest.mark.parametrize("year", [2024])
6-
def test_small_cps_loads(year: int):
6+
def test_small_ecps_loads(year: int):
77
from policyengine_core.data import Dataset
88
from policyengine_us_data.storage import STORAGE_FOLDER
99
from policyengine_us import Microsimulation
1010

1111
sim = Microsimulation(
1212
dataset=Dataset.from_file(
13-
STORAGE_FOLDER / f"small_cps_{year}.h5",
13+
STORAGE_FOLDER / f"small_enhanced_cps_{year}.h5",
1414
)
1515
)
1616

0 commit comments

Comments
 (0)