Skip to content

Commit 4a395f8

Browse files
authored
Merge pull request #398 from PolicyEngine/treasury
A SQLite Database for Calibration Targets
2 parents 4a96cfc + 2875311 commit 4a395f8

19 files changed

+3469
-337
lines changed

.github/workflows/reusable_test.yaml

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,10 @@ jobs:
5858
if: inputs.full_suite
5959
run: make download
6060

61+
- name: Create and load calibration targets database
62+
if: inputs.full_suite
63+
run: make database
64+
6165
- name: Build datasets
6266
if: inputs.full_suite
6367
run: make data
@@ -90,4 +94,4 @@ jobs:
9094
with:
9195
branch: gh-pages
9296
folder: docs/_build/html
93-
clean: true
97+
clean: true

Makefile

Lines changed: 7 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -22,12 +22,6 @@ changelog:
2222
download:
2323
python policyengine_us_data/storage/download_private_prerequisites.py
2424

25-
targets:
26-
python policyengine_us_data/storage/calibration_targets/pull_hardcoded_targets.py
27-
python policyengine_us_data/storage/calibration_targets/pull_age_targets.py
28-
python policyengine_us_data/storage/calibration_targets/pull_soi_targets.py
29-
python policyengine_us_data/storage/calibration_targets/pull_snap_targets.py
30-
3125
upload:
3226
python policyengine_us_data/storage/upload_completed_datasets.py
3327

@@ -61,10 +55,12 @@ documentation-dev:
6155

6256
database:
6357
python policyengine_us_data/db/create_database_tables.py
64-
python policyengine_us_data/db/load_age_targets.py
65-
66-
clean-database:
67-
rm *.db
58+
python policyengine_us_data/db/create_initial_strata.py
59+
python policyengine_us_data/db/etl_age.py
60+
python policyengine_us_data/db/etl_medicaid.py
61+
python policyengine_us_data/db/etl_snap.py
62+
python policyengine_us_data/db/etl_irs_soi.py
63+
python policyengine_us_data/db/validate_database.py
6864

6965
data:
7066
python policyengine_us_data/utils/uprating.py
@@ -80,6 +76,7 @@ data:
8076

8177
clean:
8278
rm -f policyengine_us_data/storage/*.h5
79+
rm -f policyengine_us_data/storage/*.db
8380
git clean -fX -- '*.csv'
8481
rm -rf policyengine_us_data/docs/_build
8582

changelog_entry.yaml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
- bump: minor
2+
changes:
3+
added:
4+
- add SQLite database for calibration targets

policyengine_us_data/db/create_database_tables.py

Lines changed: 16 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,20 @@
11
import logging
22
import hashlib
33
from typing import List, Optional
4+
from enum import Enum
45

56
from sqlalchemy import event, UniqueConstraint
67
from sqlalchemy.orm.attributes import get_history
7-
88
from sqlmodel import (
99
Field,
1010
Relationship,
1111
SQLModel,
1212
create_engine,
1313
)
14+
from policyengine_us.system import system
15+
16+
from policyengine_us_data.storage import STORAGE_FOLDER
17+
1418

1519
logging.basicConfig(
1620
level=logging.INFO,
@@ -20,6 +24,12 @@
2024
logger = logging.getLogger(__name__)
2125

2226

27+
# An Enum type to ensure the variable exists in policyengine-us
28+
USVariable = Enum(
29+
"USVariable", {name: name for name in system.variables.keys()}, type=str
30+
)
31+
32+
2333
class Stratum(SQLModel, table=True):
2434
"""Represents a unique population subgroup (stratum)."""
2535

@@ -79,7 +89,7 @@ class StratumConstraint(SQLModel, table=True):
7989
__tablename__ = "stratum_constraints"
8090

8191
stratum_id: int = Field(foreign_key="strata.stratum_id", primary_key=True)
82-
constraint_variable: str = Field(
92+
constraint_variable: USVariable = Field(
8393
primary_key=True,
8494
description="The variable the constraint applies to (e.g., 'age').",
8595
)
@@ -112,7 +122,7 @@ class Target(SQLModel, table=True):
112122
)
113123

114124
target_id: Optional[int] = Field(default=None, primary_key=True)
115-
variable: str = Field(
125+
variable: USVariable = Field(
116126
description="A variable defined in policyengine-us (e.g., 'income_tax')."
117127
)
118128
period: int = Field(
@@ -171,12 +181,11 @@ def calculate_definition_hash(mapper, connection, target: Stratum):
171181
fingerprint_text = "\n".join(constraint_strings)
172182
h = hashlib.sha256(fingerprint_text.encode("utf-8"))
173183
target.definition_hash = h.hexdigest()
174-
logger.info(
175-
f"Set definition_hash for Stratum to '{target.definition_hash}'"
176-
)
177184

178185

179-
def create_database(db_uri="sqlite:///policy_data.db"):
186+
def create_database(
187+
db_uri: str = f"sqlite:///{STORAGE_FOLDER / 'policy_data.db'}",
188+
):
180189
"""
181190
Creates a SQLite database and all the defined tables.
182191
Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
from typing import Dict
2+
3+
import pandas as pd
4+
from sqlmodel import Session, create_engine
5+
6+
from policyengine_us_data.storage import STORAGE_FOLDER
7+
8+
9+
from policyengine_us.variables.household.demographic.geographic.ucgid.ucgid_enum import (
10+
UCGID,
11+
)
12+
from policyengine_us_data.db.create_database_tables import (
13+
Stratum,
14+
StratumConstraint,
15+
)
16+
17+
18+
def main():
19+
# Get the implied hierarchy by the UCGID enum --------
20+
rows = []
21+
for node in UCGID:
22+
codes = node.get_hierarchical_codes()
23+
rows.append(
24+
{
25+
"name": node.name,
26+
"code": codes[0],
27+
"parent": codes[1] if len(codes) > 1 else None,
28+
}
29+
)
30+
31+
hierarchy_df = (
32+
pd.DataFrame(rows)
33+
.sort_values(["parent", "code"], na_position="first")
34+
.reset_index(drop=True)
35+
)
36+
37+
DATABASE_URL = f"sqlite:///{STORAGE_FOLDER / 'policy_data.db'}"
38+
engine = create_engine(DATABASE_URL)
39+
40+
# map the ucgid_str 'code' to auto-generated 'stratum_id'
41+
code_to_stratum_id: Dict[str, int] = {}
42+
43+
with Session(engine) as session:
44+
for _, row in hierarchy_df.iterrows():
45+
parent_code = row["parent"]
46+
47+
parent_id = (
48+
code_to_stratum_id.get(parent_code) if parent_code else None
49+
)
50+
51+
new_stratum = Stratum(
52+
parent_stratum_id=parent_id,
53+
notes=f'{row["name"]} (ucgid {row["code"]})',
54+
stratum_group_id=1,
55+
)
56+
57+
new_stratum.constraints_rel = [
58+
StratumConstraint(
59+
constraint_variable="ucgid_str",
60+
operation="in",
61+
value=row["code"],
62+
)
63+
]
64+
65+
session.add(new_stratum)
66+
67+
session.flush()
68+
69+
code_to_stratum_id[row["code"]] = new_stratum.stratum_id
70+
71+
session.commit()
72+
73+
74+
if __name__ == "__main__":
75+
main()

0 commit comments

Comments
 (0)