1414from policyengine_us_data .utils import QRF
1515import logging
1616
17+ test_lite = os .environ .get ("TEST_LITE" )
18+
1719
1820class CPS (Dataset ):
1921 name = "cps"
@@ -49,21 +51,33 @@ def generate(self):
4951 raw_data [entity ] for entity in ENTITIES
5052 ]
5153
54+ logging .info ("Adding ID variables" )
5255 add_id_variables (cps , person , tax_unit , family , spm_unit , household )
56+ logging .info ("Adding personal variables" )
5357 add_personal_variables (cps , person )
58+ logging .info ("Adding personal income variables" )
5459 add_personal_income_variables (cps , person , self .raw_cps .time_period )
60+ logging .info ("Adding previous year income variables" )
5561 add_previous_year_income (self , cps )
62+ logging .info ("Adding SSN card type" )
5663 add_ssn_card_type (cps , person )
64+ logging .info ("Adding family variables" )
5765 add_spm_variables (cps , spm_unit )
66+ logging .info ("Adding household variables" )
5867 add_household_variables (cps , household )
68+ logging .info ("Adding rent" )
5969 add_rent (self , cps , person , household )
70+ logging .info ("Adding auto loan balance" )
6071 add_auto_loan_balance (self , cps )
72+ logging .info ("Adding tips" )
6173 add_tips (self , cps )
74+ logging .info ("Added all variables" )
6275
6376 raw_data .close ()
6477 self .save_dataset (cps )
65-
78+ logging . info ( "Adding takeup" )
6679 add_takeup (self )
80+ logging .info ("Downsampling" )
6781
6882 # Downsample
6983 if self .frac is not None and self .frac < 1.0 :
@@ -146,7 +160,9 @@ def add_rent(self, cps: h5py.File, person: DataFrame, household: DataFrame):
146160 },
147161 na_action = "ignore" ,
148162 ).fillna (train_df .tenure_type )
149- train_df = train_df [train_df .is_household_head ].sample (100_000 )
163+ train_df = train_df [train_df .is_household_head ].sample (
164+ 100_000 if not test_lite else 1_000
165+ )
150166 inference_df = cps_sim .calculate_dataframe (PREDICTORS )
151167 mask = inference_df .is_household_head .values
152168 inference_df = inference_df [mask ]
@@ -290,7 +306,7 @@ def add_auto_loan_balance(self, cps: h5py.File) -> None:
290306 donor_data = donor_data .loc [
291307 np .random .choice (
292308 donor_data .index ,
293- size = 100_000 ,
309+ size = 100_000 if not test_lite else 1_000 ,
294310 replace = True ,
295311 p = donor_data .household_weight / donor_data .household_weight .sum (),
296312 )
@@ -303,7 +319,7 @@ def add_auto_loan_balance(self, cps: h5py.File) -> None:
303319 X_train = donor_data ,
304320 predictors = PREDICTORS ,
305321 imputed_variables = IMPUTED_VARIABLES ,
306- tune_hyperparameters = True ,
322+ tune_hyperparameters = not test_lite ,
307323 )
308324
309325 imputations = fitted_model .predict (X_test = receiver_data )
0 commit comments