@@ -213,6 +213,71 @@ def preprocess_puf(puf: pd.DataFrame) -> pd.DataFrame:
213213 # 10.1% passthrough rate for W2 wages hits the JCT tax expenditure target for QBID
214214 # https://gist.github.com/nikhilwoodruff/262c80b8b17935d6fb8544647143b854
215215
216+ regr_df = pd .DataFrame ({
217+ 'y_sched_c_00900' : puf .E00900 ,
218+ 'y_sched_e_02000' : puf .E02000 ,
219+ 'y_sched_f_02100' : puf .E02100 ,
220+ 'y_sched_k1_26270' : puf .E26270 ,
221+ 'x_farm_rent_27200' : puf .E27200 ,
222+ 'x_rent_royalty_inc_loss_25700' : puf .P25700 ,
223+ 'x_rent_royalty_inc_25850' : puf .E25850 , # Strictly positive
224+ 'x_rent_royalty_loss_25860' : puf .E25860 , # Strictly positive
225+ 'x_estate_income_26390' : puf .E26390 ,
226+ 'x_estate_loss_26400' : puf .E26400 ,
227+ # End of the variables Max mentioned
228+ 'z_health_insurance_deduction_03270' : puf .E03270 , # aparently this is relevant to QBI
229+ 'x_total_partnership_passive_income_25940' : puf .E25940 , # Should not count
230+ 'x_total_partnership_nonpassive_income_25980' : puf .E25980 , # Counts towards QBI
231+ 'x_total_partnership_passive_loss_25920' : puf .E25920 , # Should not count towards QBI
232+ 'x_total_partnership_nonpassive_loss_25960' : puf .E25960 , # Counts towards QBI
233+ 'z_partnership_sec179_deduction_26110' : puf .E26110 , # Some of it will count
234+ 'x_smallbiz_total_passive_income_26170' : puf .E26170 , # Should not count
235+ 'x_smallbiz_total_nonpassive_income_26190' : puf .E26190 , # Should count
236+ 'x_smallbiz_total_passive_loss_26160' : puf .E26160 , # Should not count
237+ 'x_smallbiz_total_nonpassive_loss_26180' : puf .E26180 , # Should count
238+ 'z_smallbiz_sec179_deduction_26100' : puf .E26100 # Some of it will count
239+ })
240+
241+
242+ regr_df .x_rent_royalty_inc_25850
243+ regr_df .x_rent_royalty_loss_25860
244+ regr_df .x_rent_royalty_inc_loss_25700
245+ np .corrcoef (regr_df .x_rent_royalty_inc_loss_25700 ,
246+ regr_df .x_rent_royalty_inc_25850 - regr_df .x_rent_royalty_loss_25860 )
247+
248+ #'y_sched_c_00900'
249+ #'y_sched_e_02000'
250+ #'y_sched_f_02100'
251+ #'y_sched_k1_26270'
252+
253+ y_variable_to_regress = 'y_sched_e_02000'
254+ x_predictor_variables = [
255+ 'x_farm_rent_27200' ,
256+ 'x_rent_royalty_inc_loss_25700' ,
257+ 'x_rent_royalty_inc_25850' ,
258+ 'x_rent_royalty_loss_25860' ,
259+ 'x_estate_income_26390' ,
260+ 'x_estate_loss_26400' ,
261+ 'x_total_partnership_passive_income_25940' ,
262+ 'x_total_partnership_nonpassive_income_25980' ,
263+ 'x_total_partnership_passive_loss_25920' ,
264+ 'x_total_partnership_nonpassive_loss_25960' ,
265+ 'x_smallbiz_total_passive_income_26170' ,
266+ 'x_smallbiz_total_nonpassive_income_26190' ,
267+ 'x_smallbiz_total_passive_loss_26160' ,
268+ 'x_smallbiz_total_nonpassive_loss_26180'
269+ ]
270+ Y_target = regr_df [y_variable_to_regress ]
271+
272+ import statsmodels .api as sm
273+ X_data = regr_df [x_predictor_variables ].copy ()
274+ X_data_with_const = sm .add_constant (X_data , has_constant = 'add' )
275+
276+ model = sm .OLS (Y_target , X_data_with_const , missing = 'drop' )
277+ results = model .fit ()
278+ print (f"--------Y: { y_variable_to_regress } ----------" )
279+ results .summary ()
280+
216281 # wages simulation
217282 MIN_MARGIN = .03 # Minimum profit margin
218283 MAX_MARGIN = .15 # Maximum profit margin
@@ -234,6 +299,9 @@ def preprocess_puf(puf: pd.DataFrame) -> pd.DataFrame:
234299
235300 puf ["w2_wages_from_qualified_business" ] = hypothetical_w2_gross_income * has_w2_employees
236301
302+ # TODO: remove eventually (I think)
303+ puf ["qbi" ] = qbi
304+
237305 #W2_WAGES_SCALE = 0.101
238306 #puf["w2_wages_from_qualified_business"] = qbi * W2_WAGES_SCALE
239307
@@ -379,6 +447,7 @@ def estimate_ubia_from_depreciation(depreciation_amount, business_type=None):
379447 "unreported_payroll_tax" ,
380448 "pre_tax_contributions" ,
381449 "w2_wages_from_qualified_business" ,
450+ "qbi" , # TODO: temporary
382451 "unadjusted_basis_qualified_property" ,
383452 "business_is_sstb" ,
384453 "deductible_mortgage_interest" ,
0 commit comments