Skip to content

Commit d99ba7d

Browse files
Impute pregnancy (#164)
* Impute pregnancy Fixes #162 * refactor and add test * lint * fix * minor * rename and lint
1 parent 806eb10 commit d99ba7d

File tree

3 files changed

+54
-0
lines changed

3 files changed

+54
-0
lines changed

policyengine_us_data/datasets/cps/cps.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -231,6 +231,7 @@ def add_personal_variables(cps: h5py.File, person: DataFrame) -> None:
231231
np.random.randint(80, 85, len(person)),
232232
person.A_AGE,
233233
)
234+
234235
# A_SEX is 1 -> male, 2 -> female.
235236
cps["is_female"] = person.A_SEX == 2
236237
# "Is...blind or does...have serious difficulty seeing even when Wearing
@@ -241,6 +242,29 @@ def add_personal_variables(cps: h5py.File, person: DataFrame) -> None:
241242
]
242243
cps["is_disabled"] = (person[DISABILITY_FLAGS] == 1).any(axis=1)
243244

245+
def _assign_some_newborns_to_pregnancy(
246+
age: pd.Series, person: pd.DataFrame
247+
) -> pd.Series:
248+
"""Takes an array of ages, returns the new age array with the given percentage of newborns assigned a negative age (in pregnancy)."""
249+
age = np.where(
250+
person.A_AGE == 0,
251+
np.where(
252+
np.random.randint(
253+
0, 2, len(person)
254+
), # Random number of 0 or 1
255+
# If 1 is flipped, select a random number between -0.75 and 0
256+
# This will represent the pregnany month
257+
# At -0.75 the pregnancy month is 0 and at -0.0001 the pregnancy month is 9
258+
np.random.uniform(-0.75, 0, len(person)),
259+
# If 0 is flipped, the child is a newborn at the age of 0 to 1
260+
np.random.uniform(0, 1, len(person)),
261+
),
262+
person.A_AGE,
263+
)
264+
return age
265+
266+
cps["age"] = _assign_some_newborns_to_pregnancy(cps["age"], person)
267+
244268
def children_per_parent(col: str) -> pd.DataFrame:
245269
"""Calculate number of children in the household using parental
246270
pointers.

policyengine_us_data/tests/test_datasets/test_enhanced_cps.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,3 +37,20 @@ def test_ecps_has_mortgage_interest():
3737

3838
assert sim.calculate("deductible_mortgage_interest").sum() > 1
3939
assert sim.calculate("deductible_interest_expense").sum() > 1
40+
41+
42+
def test_newborns_and_pregnancies():
43+
from policyengine_us_data.datasets.cps import EnhancedCPS_2024
44+
from policyengine_us import Microsimulation
45+
46+
sim = Microsimulation(dataset=EnhancedCPS_2024)
47+
48+
# Test for unborn children (age < 0)
49+
unborn = sim.calculate("age") < 0
50+
unborn_count = unborn.sum()
51+
assert unborn_count > 0
52+
53+
# Test for newborns (0 <= age < 1)
54+
newborns = (sim.calculate("age") >= 0) & (sim.calculate("age") < 1)
55+
newborn_count = newborns.sum()
56+
assert newborn_count > 0

policyengine_us_data/utils/loss.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -340,6 +340,19 @@ def build_loss_matrix(dataset: type, time_period):
340340
)
341341
targets_array.append(row["population_under_5"])
342342

343+
# Population by number of newborns and pregancies
344+
345+
age = sim.calculate("age").values
346+
infants = (age >= 0) & (age < 1)
347+
label = "census/infants"
348+
loss_matrix[label] = sim.map_result(infants, "person", "household")
349+
targets_array.append(3_491_679)
350+
351+
pregnancies = (age >= -0.75) & (age < 0)
352+
label = "census/pregnancies"
353+
loss_matrix[label] = sim.map_result(pregnancies, "person", "household")
354+
targets_array.append(2_618_759)
355+
343356
if any(loss_matrix.isna().sum() > 0):
344357
raise ValueError("Some targets are missing from the loss matrix")
345358

0 commit comments

Comments
 (0)