1+ import os
2+ import yaml
3+ from importlib .resources import files
4+
15from tqdm import tqdm
26import numpy as np
37import pandas as pd
48from microdf import MicroDataFrame
9+
510from policyengine_core .data import Dataset
611from policyengine_us_data .storage import STORAGE_FOLDER
712from policyengine_us_data .datasets .puf .uprate_puf import uprate_puf
813from policyengine_us_data .datasets .puf .irs_puf import IRS_PUF_2015
914from policyengine_us_data .utils .uprating import (
1015 create_policyengine_uprating_factors_table ,
1116)
12- import os
17+
1318
1419rng = np .random .default_rng (seed = 64 )
1520
21+ # Get Qualified Business Income simulation parameters ---
22+ yamlfilename = (
23+ files ("policyengine_us_data" ) / "datasets" / "puf" / "qbi_assumptions.yaml"
24+ )
25+ with open (yamlfilename , "r" , encoding = "utf-8" ) as yamlfile :
26+ QBI_PARAMS = yaml .safe_load (yamlfile )
27+ assert isinstance (QBI_PARAMS , dict )
28+
29+
30+ # Helper functions ---
31+ def sample_bernoulli_lognormal (n , prob , log_mean , log_sigma , rng ):
32+ """Generate a Bernoulli-lognormal mixture."""
33+ positive = np .random .binomial (1 , prob , size = n )
34+ amounts = np .where (
35+ positive ,
36+ rng .lognormal (mean = log_mean , sigma = log_sigma , size = n ),
37+ 0.0 ,
38+ )
39+ return amounts
40+
41+
42+ def conditionally_sample_lognormal (flag , target_mean , log_sigma , rng ):
43+ """Generate a lognormal conditional on a binary flag."""
44+ mu = np .log (target_mean ) - (log_sigma ** 2 / 2 )
45+ return np .where (
46+ flag ,
47+ rng .lognormal (
48+ mean = mu ,
49+ sigma = log_sigma ,
50+ ),
51+ 0.0 ,
52+ )
53+
54+
55+ def simulate_w2_and_ubia_from_puf (puf , * , seed = None , diagnostics = True ):
56+ """
57+ Simulate two Section 199A guard-rail quantities for every record
58+ - W-2 wages paid by the business
59+ - Unadjusted basis immediately after acquisition (UBIA) of property
60+
61+ Parameters
62+ ----------
63+ puf : pandas.DataFrame
64+ Must contain the income columns created in your preprocessing block.
65+ seed : int, optional
66+ For reproducible random draws.
67+ diagnostics : bool, default True
68+ Print high-level checks after the simulation runs.
69+
70+ Returns
71+ -------
72+ w2_wages : 1-D NumPy array
73+ ubia : 1-D NumPy array
74+ """
75+ rng = np .random .default_rng (seed )
76+
77+ # Extract Qualified Business Income simulation parameters
78+ qbi_probs = QBI_PARAMS ["qbi_qualification_probabilities" ]
79+ margin_params = QBI_PARAMS ["profit_margin_distribution" ]
80+ logit_params = QBI_PARAMS ["has_employees_logit" ]
81+
82+ labor_params = QBI_PARAMS ["labor_ratio_distribution" ]
83+ rental_labor = labor_params ["rental" ]
84+ non_rental_labor = labor_params ["non_rental" ]
85+
86+ rental_beta_a = rental_labor ["beta_a" ]
87+ rental_beta_b = rental_labor ["beta_b" ]
88+ rental_scale = rental_labor ["scale" ]
89+
90+ non_rental_beta_a = non_rental_labor ["beta_a" ]
91+ non_rental_beta_b = non_rental_labor ["beta_b" ]
92+ non_rental_scale = non_rental_labor ["scale" ]
93+
94+ depr_sigma = QBI_PARAMS ["depreciation_proxy_sigma" ]
95+
96+ ubia_params = QBI_PARAMS ["ubia_simulation" ]
97+ ubia_multiple_of_qbi = ubia_params ["multiple_of_qbi" ]
98+ ubia_sigma = ubia_params ["sigma" ]
99+
100+ # Estimate qualified business income
101+ qbi = sum (
102+ puf [income_type ] * prob for income_type , prob in qbi_probs .items ()
103+ ).to_numpy ()
104+
105+ # Simulate gross receipts by drawing a profit margin
106+ margins = (
107+ rng .beta (margin_params ["beta_a" ], margin_params ["beta_b" ], qbi .size )
108+ * margin_params ["scale" ]
109+ + margin_params ["shift" ]
110+ )
111+ revenues = np .maximum (qbi , 0 ) / margins
112+
113+ logit = (
114+ logit_params ["intercept" ] + logit_params ["slope_per_dollar" ] * revenues
115+ )
116+
117+ # Set p = 0 when simulated receipts == 0 (no revenue means no payroll)
118+ pr_has_employees = np .where (
119+ revenues == 0.0 , 0.0 , 1.0 / (1.0 + np .exp (- logit ))
120+ )
121+ has_employees = rng .binomial (1 , pr_has_employees )
122+
123+ # Labor share simulation
124+ is_rental = puf ["rental_income" ].to_numpy () > 0
125+
126+ labor_ratios = np .where (
127+ is_rental ,
128+ rng .beta (rental_beta_a , rental_beta_b , qbi .size ) * rental_scale ,
129+ rng .beta (non_rental_beta_a , non_rental_beta_b , qbi .size )
130+ * non_rental_scale ,
131+ )
132+
133+ w2_wages = revenues * labor_ratios * has_employees
134+
135+ # A depreciation stand-in that scales with rents
136+ depreciation_proxy = conditionally_sample_lognormal (
137+ is_rental ,
138+ puf ["rental_income" ],
139+ depr_sigma ,
140+ rng ,
141+ )
142+
143+ # UBIA simulation: lognormal, but only for capital-heavy records
144+ is_capital_intensive = is_rental | (depreciation_proxy > 0 )
145+
146+ ubia = conditionally_sample_lognormal (
147+ is_capital_intensive ,
148+ ubia_multiple_of_qbi * np .maximum (qbi , 0 ),
149+ ubia_sigma ,
150+ rng ,
151+ )
152+
153+ if diagnostics :
154+ share_qbi_pos = np .mean (qbi > 0 )
155+ share_wages = np .mean ((w2_wages > 0 ) & (qbi > 0 ))
156+ print (f"Share with QBI > 0: { share_qbi_pos :6.2%} " )
157+ print (f"Among those, share with W-2 wages: { share_wages :6.2%} " )
158+ if np .any (w2_wages > 0 ):
159+ print (f"Mean W-2 (if >0): ${ np .mean (w2_wages [w2_wages > 0 ]):,.0f} " )
160+ if np .any (ubia > 0 ):
161+ print (f"Median UBIA (if >0): ${ np .median (ubia [ubia > 0 ]):,.0f} " )
162+
163+ return w2_wages , ubia
164+
16165
17166def impute_pension_contributions_to_puf (puf_df ):
18167 from policyengine_us import Microsimulation
@@ -154,8 +303,8 @@ def preprocess_puf(puf: pd.DataFrame) -> pd.DataFrame:
154303 puf ["educator_expense" ] = puf .E03220
155304 puf ["employment_income" ] = puf .E00200
156305 puf ["estate_income" ] = puf .E26390 - puf .E26400
306+ # Schedule J, separate from QBI
157307 puf ["farm_income" ] = puf .T27800
158- puf ["farm_rent_income" ] = puf .E27200
159308 puf ["health_savings_account_ald" ] = puf .E03290
160309 puf ["interest_deduction" ] = puf .E19200
161310 puf ["long_term_capital_gains" ] = puf .P23250
@@ -170,11 +319,21 @@ def preprocess_puf(puf: pd.DataFrame) -> pd.DataFrame:
170319 # that can be deducted under the miscellaneous deduction.
171320 puf ["unreimbursed_business_employee_expenses" ] = puf .E20400
172321 puf ["non_qualified_dividend_income" ] = puf .E00600 - puf .E00650
173- puf ["partnership_s_corp_income" ] = puf .E26270
174322 puf ["qualified_dividend_income" ] = puf .E00650
175323 puf ["qualified_tuition_expenses" ] = puf .E03230
176324 puf ["real_estate_taxes" ] = puf .E18500
325+ # Schedule E rent and royalty
177326 puf ["rental_income" ] = puf .E25850 - puf .E25860
327+ # Schedule E active S-Corp income
328+ s_corp_income = puf .E26190 - puf .E26180
329+ # Schedule E active partnership income
330+ partnership_income = puf .E25980 - puf .E25960
331+ puf ["partnership_s_corp_income" ] = s_corp_income + partnership_income
332+ # Schedule F active farming operations
333+ puf ["farm_operations_income" ] = puf .E02100
334+ # Schedule E farm rental income
335+ puf ["farm_rent_income" ] = puf .E27200
336+ # Schedule C Sole Proprietorship
178337 puf ["self_employment_income" ] = puf .E00900
179338 puf ["self_employed_health_insurance_ald" ] = puf .E03270
180339 puf ["self_employed_pension_contribution_ald" ] = puf .E03300
@@ -211,15 +370,38 @@ def preprocess_puf(puf: pd.DataFrame) -> pd.DataFrame:
211370 # Ignore f2441 (AMT form attached)
212371 # Ignore cmbtp (estimate of AMT income not in AGI)
213372 # Ignore k1bx14s and k1bx14p (partner self-employment income included in partnership and S-corp income)
214- qbi = np .maximum (0 , puf .E00900 + puf .E26270 + puf .E02100 + puf .E27200 )
215- # 10.1% passthrough rate for W2 wages hits the JCT tax expenditure target for QBID
216- # https://gist.github.com/nikhilwoodruff/262c80b8b17935d6fb8544647143b854
217- W2_WAGES_SCALE = 0.101
218- puf ["w2_wages_from_qualified_business" ] = qbi * W2_WAGES_SCALE
219373
220- # Remove aggregate records
221- puf = puf [puf .MARS != 0 ]
374+ # --- Qualified Business Income Deduction (QBID) simulation ---
375+ w2 , ubia = simulate_w2_and_ubia_from_puf (puf , seed = 42 )
376+ puf ["w2_wages_from_qualified_business" ] = w2
377+ puf ["unadjusted_basis_qualified_property" ] = ubia
222378
379+ puf_qbi_sources_for_sstb = puf [QBI_PARAMS ["sstb_prob_map_by_name" ].keys ()]
380+ largest_qbi_source_name = puf_qbi_sources_for_sstb .idxmax (axis = 1 )
381+
382+ pr_sstb = largest_qbi_source_name .map (
383+ QBI_PARAMS ["sstb_prob_map_by_name" ]
384+ ).fillna (0.0 )
385+ puf ["business_is_sstb" ] = np .random .binomial (n = 1 , p = pr_sstb )
386+
387+ reit_params = QBI_PARAMS ["reit_ptp_income_distribution" ]
388+ p_reit_ptp = reit_params ["probability_of_receiving" ]
389+ mu_reit_ptp = reit_params ["log_normal_mu" ]
390+ sigma_reit_ptp = reit_params ["log_normal_sigma" ]
391+
392+ puf ["qualified_reit_and_ptp_income" ] = sample_bernoulli_lognormal (
393+ len (puf ), p_reit_ptp , mu_reit_ptp , sigma_reit_ptp , rng
394+ )
395+
396+ bdc_params = QBI_PARAMS ["bdc_income_distribution" ]
397+ p_bdc = bdc_params ["probability_of_receiving" ]
398+ mu_bdc = bdc_params ["log_normal_mu" ]
399+ sigma_bdc = bdc_params ["log_normal_sigma" ]
400+
401+ puf ["qualified_bdc_income" ] = sample_bernoulli_lognormal (
402+ len (puf ), p_bdc , mu_bdc , sigma_bdc , rng
403+ )
404+ # -------- End of Qualified Business Income Deduction (QBID) -------
223405 puf ["filing_status" ] = puf .MARS .map (
224406 {
225407 1 : "SINGLE" ,
@@ -248,6 +430,7 @@ def preprocess_puf(puf: pd.DataFrame) -> pd.DataFrame:
248430 "educator_expense" ,
249431 "employment_income" ,
250432 "estate_income" ,
433+ "farm_operations_income" ,
251434 "farm_income" ,
252435 "farm_rent_income" ,
253436 "health_savings_account_ald" ,
@@ -257,7 +440,6 @@ def preprocess_puf(puf: pd.DataFrame) -> pd.DataFrame:
257440 "unreimbursed_business_employee_expenses" ,
258441 "non_qualified_dividend_income" ,
259442 "non_sch_d_capital_gains" ,
260- "partnership_s_corp_income" ,
261443 "qualified_dividend_income" ,
262444 "qualified_tuition_expenses" ,
263445 "real_estate_taxes" ,
@@ -293,7 +475,12 @@ def preprocess_puf(puf: pd.DataFrame) -> pd.DataFrame:
293475 "unreported_payroll_tax" ,
294476 "pre_tax_contributions" ,
295477 "w2_wages_from_qualified_business" ,
478+ "unadjusted_basis_qualified_property" ,
479+ "business_is_sstb" ,
296480 "deductible_mortgage_interest" ,
481+ "partnership_s_corp_income" ,
482+ "qualified_reit_and_ptp_income" ,
483+ "qualified_bdc_income" ,
297484]
298485
299486
0 commit comments