Skip to content
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
cd6cf58
first round of eitc targets are added
baogorek Jul 29, 2025
867bec6
linting
baogorek Jul 29, 2025
c2dd4af
changelog_entry.yaml
baogorek Jul 29, 2025
9f7f674
merging main
baogorek Jul 29, 2025
92979ca
Merge branch 'main' of github.com:PolicyEngine/policyengine-us-data i…
baogorek Aug 2, 2025
95a4a9a
new file in progress
baogorek Aug 2, 2025
6fd3542
moving to QBID and SALT
baogorek Aug 7, 2025
c73ef87
new variables added
baogorek Aug 7, 2025
57d9850
medicaid etl file
baogorek Aug 8, 2025
33cf8e9
merging main
baogorek Aug 11, 2025
9c4838e
medicaid is loading in
baogorek Aug 11, 2025
57716f2
medicaid and some SNAP data
baogorek Aug 12, 2025
7b3cacc
got SNAP settled
baogorek Aug 12, 2025
e45072e
progress
baogorek Aug 12, 2025
6d482e7
all major targets loaded
baogorek Aug 14, 2025
dddf689
linting
baogorek Aug 14, 2025
9c3a460
fixed national stratum in agi script
baogorek Aug 15, 2025
81e2011
refactor: use sqlmodel session
baogorek Aug 15, 2025
d5b3571
storage file updates
baogorek Aug 15, 2025
a729450
Merge branch 'treasury' into codex/fix-orm-inconsistencies-and-improv…
baogorek Aug 15, 2025
26b561b
Merge pull request #430 from PolicyEngine/codex/fix-orm-inconsistenci…
baogorek Aug 15, 2025
a1de133
Store policy database in storage folder
baogorek Aug 15, 2025
b7867db
Merge pull request #431 from PolicyEngine/codex/update-storage-path-f…
baogorek Aug 15, 2025
b376726
adding make database to reusable test. Updating changelog_entry
baogorek Aug 15, 2025
9078ed9
removing TODOs
baogorek Aug 15, 2025
9913e3c
Removed troublesome logging. Updated Makefile
baogorek Aug 15, 2025
fddc3ac
updated comments based on feedback. Removed old make target
baogorek Aug 15, 2025
0cf920a
test: move database tests into package
baogorek Aug 18, 2025
35f78cd
Merge pull request #433 from PolicyEngine/codex/add-tests-for-policye…
baogorek Aug 18, 2025
0571ff5
Add Great Expectations validation for database
baogorek Aug 18, 2025
648eabf
Merge pull request #434 from PolicyEngine/codex/add-great-expectation…
baogorek Aug 18, 2025
bdef501
working pre lint
baogorek Aug 18, 2025
d295926
post lint
baogorek Aug 18, 2025
bd104d0
updating IRS target variables
baogorek Aug 20, 2025
2875311
changing the salt variable to uncapped
baogorek Aug 20, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions changelog_entry.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
- bump: minor
changes:
added:
- load script for eitc targets
22 changes: 11 additions & 11 deletions policyengine_us_data/db/load_age_targets.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,18 +174,18 @@ def transform_age_data(age_data, docs):
)

df = df.drop(columns="NAME")
df = df.rename({"GEO_ID": "ucgid"}, axis=1)
df_data = df.rename(columns=rename_mapping)[["ucgid"] + list(AGE_COLS)]
df = df.rename({"GEO_ID": "ucgid_str"}, axis=1)
df_data = df.rename(columns=rename_mapping)[["ucgid_str"] + list(AGE_COLS)]

# Filter out Puerto Rico's district and state records, if needed
df_geos = df_data[
~df_data["ucgid"].isin(["5001800US7298", "0400000US72"])
~df_data["ucgid_str"].isin(["5001800US7298", "0400000US72"])
].copy()

df = df_geos[["ucgid"] + AGE_COLS]
df = df_geos[["ucgid_str"] + AGE_COLS]

df_long = df.melt(
id_vars="ucgid",
id_vars="ucgid_str",
value_vars=AGE_COLS,
var_name="age_range",
value_name="value",
Expand All @@ -212,11 +212,11 @@ def load_age_data(df_long, geo, stratum_lookup={}):

# Quick data quality check before loading ----
if geo == "National":
assert len(set(df_long.ucgid)) == 1
assert len(set(df_long.ucgid_str)) == 1
elif geo == "State":
assert len(set(df_long.ucgid)) == 51
assert len(set(df_long.ucgid_str)) == 51
elif geo == "District":
assert len(set(df_long.ucgid)) == 436
assert len(set(df_long.ucgid_str)) == 436
else:
raise ValueError('geo must be one of "National", "State", "District"')

Expand All @@ -238,7 +238,7 @@ def load_age_data(df_long, geo, stratum_lookup={}):

# Create the parent Stratum object.
# We will attach children to it before adding it to the session.
note = f"Age: {row['age_range']}, Geo: {row['ucgid']}"
note = f"Age: {row['age_range']}, Geo: {row['ucgid_str']}"
parent_geo = get_parent_geo(geo)
parent_stratum_id = (
stratum_lookup[parent_geo][row["age_range"]]
Expand All @@ -253,9 +253,9 @@ def load_age_data(df_long, geo, stratum_lookup={}):
# Create constraints and link them to the parent's relationship attribute.
new_stratum.constraints_rel = [
StratumConstraint(
constraint_variable="ucgid",
constraint_variable="ucgid_str",
operation="equals",
value=row["ucgid"],
value=row["ucgid_str"],
),
StratumConstraint(
constraint_variable="age",
Expand Down
Loading