Skip to content

Commit c94efad

Browse files
Impute pregnancy
Fixes #162
1 parent f8d6a50 commit c94efad

File tree

2 files changed

+31
-1
lines changed

2 files changed

+31
-1
lines changed

policyengine_us_data/datasets/cps/cps.py

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -229,8 +229,23 @@ def add_personal_variables(cps: h5py.File, person: DataFrame) -> None:
229229
person.A_AGE == 80,
230230
# NB: randint is inclusive of first argument, exclusive of second.
231231
np.random.randint(80, 85, len(person)),
232-
person.A_AGE,
232+
np.where(
233+
person.A_AGE == 0,
234+
np.where(
235+
np.random.randint(0, 2, len(person)), # Random number of 0 or 1
236+
# If 1 is flipped, select a random number between -0.75 and 0
237+
# This will represent the pregnany month
238+
# At -0.75 the pregnancy month is 0 and at -0.0001 the pregnancy month is 9
239+
np.random.uniform(-0.75, 0, len(person)),
240+
# If 0 is flipped, the child is a newborn at the age of 0 to 1
241+
np.random.uniform(0, 1, len(person)),
242+
),
243+
person.A_AGE,
244+
)
233245
)
246+
cps["is_pregnant"] = (cps["age"] >= -0.75) & (cps["age"] < 0)
247+
cps["is_newborn"] = (cps["age"] >= 0) & (cps["age"] < 1)
248+
234249
# A_SEX is 1 -> male, 2 -> female.
235250
cps["is_female"] = person.A_SEX == 2
236251
# "Is...blind or does...have serious difficulty seeing even when Wearing

policyengine_us_data/utils/loss.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -254,6 +254,8 @@ def build_loss_matrix(dataset: type, time_period):
254254
# Rough estimate, not CPS derived
255255
"real_estate_taxes": 400e9, # Rough estimate between 350bn and 600bn total property tax collections
256256
"rent": 735e9, # ACS total uprated by CPI
257+
"is_newborn": 3_491_679, # ACS total of people aged 0
258+
"is_pregnant": 2_618_759, # 75% of the ACS total of people aged 0
257259
}
258260

259261
for variable_name, target in HARD_CODED_TOTALS.items():
@@ -340,6 +342,19 @@ def build_loss_matrix(dataset: type, time_period):
340342
)
341343
targets_array.append(row["population_under_5"])
342344

345+
# Population by number of newborns and pregancies
346+
347+
age = sim.calculate("age").values
348+
newborns = (age >= 0) & (age < 1)
349+
label = "census/newborns"
350+
loss_matrix[label] = sim.map_result(newborns, "person", "household")
351+
targets_array.append(HARD_CODED_TOTALS["is_newborn"])
352+
353+
pregnancies = (age >= -0.75) & (age < 0)
354+
label = "census/pregnancies"
355+
loss_matrix[label] = sim.map_result(pregnancies, "person", "household")
356+
targets_array.append(HARD_CODED_TOTALS["is_pregnant"])
357+
343358
if any(loss_matrix.isna().sum() > 0):
344359
raise ValueError("Some targets are missing from the loss matrix")
345360

0 commit comments

Comments
 (0)