Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 24 additions & 0 deletions policyengine_us_data/datasets/cps/cps.py
Original file line number Diff line number Diff line change
Expand Up @@ -231,6 +231,7 @@ def add_personal_variables(cps: h5py.File, person: DataFrame) -> None:
np.random.randint(80, 85, len(person)),
person.A_AGE,
)

# A_SEX is 1 -> male, 2 -> female.
cps["is_female"] = person.A_SEX == 2
# "Is...blind or does...have serious difficulty seeing even when Wearing
Expand All @@ -241,6 +242,29 @@ def add_personal_variables(cps: h5py.File, person: DataFrame) -> None:
]
cps["is_disabled"] = (person[DISABILITY_FLAGS] == 1).any(axis=1)

def _assign_some_newborns_to_pregnancy(
age: pd.Series, person: pd.DataFrame
) -> pd.Series:
"""Takes an array of ages, returns the new age array with the given percentage of newborns assigned a negative age (in pregnancy)."""
age = np.where(
person.A_AGE == 0,
np.where(
np.random.randint(
0, 2, len(person)
), # Random number of 0 or 1
# If 1 is flipped, select a random number between -0.75 and 0
# This will represent the pregnany month
# At -0.75 the pregnancy month is 0 and at -0.0001 the pregnancy month is 9
np.random.uniform(-0.75, 0, len(person)),
# If 0 is flipped, the child is a newborn at the age of 0 to 1
np.random.uniform(0, 1, len(person)),
),
person.A_AGE,
)
return age

cps["age"] = _assign_some_newborns_to_pregnancy(cps["age"], person)

def children_per_parent(col: str) -> pd.DataFrame:
"""Calculate number of children in the household using parental
pointers.
Expand Down
17 changes: 17 additions & 0 deletions policyengine_us_data/tests/test_datasets/test_enhanced_cps.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,3 +37,20 @@ def test_ecps_has_mortgage_interest():

assert sim.calculate("deductible_mortgage_interest").sum() > 1
assert sim.calculate("deductible_interest_expense").sum() > 1


def test_newborns_and_pregnancies():
from policyengine_us_data.datasets.cps import EnhancedCPS_2024
from policyengine_us import Microsimulation

sim = Microsimulation(dataset=EnhancedCPS_2024)

# Test for unborn children (age < 0)
unborn = sim.calculate("age") < 0
unborn_count = unborn.sum()
assert unborn_count > 0

# Test for newborns (0 <= age < 1)
newborns = (sim.calculate("age") >= 0) & (sim.calculate("age") < 1)
newborn_count = newborns.sum()
assert newborn_count > 0
13 changes: 13 additions & 0 deletions policyengine_us_data/utils/loss.py
Original file line number Diff line number Diff line change
Expand Up @@ -340,6 +340,19 @@ def build_loss_matrix(dataset: type, time_period):
)
targets_array.append(row["population_under_5"])

# Population by number of newborns and pregancies

age = sim.calculate("age").values
infants = (age >= 0) & (age < 1)
label = "census/infants"
loss_matrix[label] = sim.map_result(infants, "person", "household")
targets_array.append(3_491_679)

pregnancies = (age >= -0.75) & (age < 0)
label = "census/pregnancies"
loss_matrix[label] = sim.map_result(pregnancies, "person", "household")
targets_array.append(2_618_759)

if any(loss_matrix.isna().sum() > 0):
raise ValueError("Some targets are missing from the loss matrix")

Expand Down
Loading