Skip to content

Commit 2092e75

Browse files
authored
Merge pull request #256 from PolicyEngine/qbid-suite
QBID variables added, simulations improved, changelog entry workflow logic
2 parents c09ff1f + de0ef90 commit 2092e75

File tree

8 files changed

+321
-13
lines changed

8 files changed

+321
-13
lines changed

.github/check-changelog-entry.sh

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
#!/usr/bin/env bash
2+
3+
# Fails if changelog_entry.yaml is empty or contains only whitespace
4+
if [ ! -s changelog_entry.yaml ] || ! grep -q '[^[:space:]]' changelog_entry.yaml; then
5+
echo "changelog_entry.yaml is empty. Please add a changelog entry before merging."
6+
exit 1
7+
fi
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
name: Changelog entry
2+
on:
3+
pull_request:
4+
branches: [main]
5+
6+
jobs:
7+
require-entry:
8+
runs-on: ubuntu-latest
9+
steps:
10+
- uses: actions/checkout@v4
11+
with:
12+
fetch-depth: 0
13+
- name: Ensure changelog entry exists
14+
run: .github/check-changelog-entry.sh

.gitignore

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,6 @@
33
**/.DS_STORE
44
**/*.h5
55
**/*.csv
6-
!uprating_factors.csv
7-
!uprating_growth_factors.csv
86
!healthcare_spending.csv
97
!eitc.csv
108
!spm_threshold_agi.csv

changelog_entry.yaml

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
- bump: minor
2+
changes:
3+
added:
4+
- Added automated checks for changelog entry
5+
- New "would be qualified income" variables simulated
6+
- REIT, PTP, and BDC dividend income variables simulated
7+
- UBIA property is being simulated
8+
- Farm Operations Income added
9+
changed:
10+
- W2 Wages from Qualified business is now being simulated with random variables
11+
- qualified business income sources have been redefined based on IRS PUF inputs

policyengine_us_data/datasets/cps/cps.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
from policyengine_us_data.utils import QRF
1616
import logging
1717

18+
1819
test_lite = os.environ.get("TEST_LITE")
1920

2021

@@ -570,6 +571,21 @@ def add_personal_income_variables(
570571
cps["other_medical_expenses"] = person.PMED_VAL
571572
cps["medicare_part_b_premiums"] = person.PEMCPREM
572573

574+
# Get QBI simulation parameters ---
575+
yamlfilename = (
576+
files("policyengine_us_data")
577+
/ "datasets"
578+
/ "puf"
579+
/ "qbi_assumptions.yaml"
580+
)
581+
with open(yamlfilename, "r", encoding="utf-8") as yamlfile:
582+
p = yaml.safe_load(yamlfile)
583+
assert isinstance(p, dict)
584+
585+
rng = np.random.default_rng(seed=43)
586+
for var, prob in p["qbi_qualification_probabilities"].items():
587+
cps[f"{var}_would_be_qualified"] = rng.random(len(person)) < prob
588+
573589

574590
def add_spm_variables(cps: h5py.File, spm_unit: DataFrame) -> None:
575591
SPM_RENAMES = dict(

policyengine_us_data/datasets/cps/extended_cps.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,8 @@
2626
"taxable_ira_distributions",
2727
"self_employment_income",
2828
"w2_wages_from_qualified_business",
29+
"unadjusted_basis_qualified_property",
30+
"business_is_sstb",
2931
"short_term_capital_gains",
3032
"qualified_dividend_income",
3133
"charitable_cash_donations",
@@ -69,6 +71,15 @@
6971
"unreported_payroll_tax",
7072
"recapture_of_investment_credit",
7173
"deductible_mortgage_interest",
74+
"qualified_reit_and_ptp_income",
75+
"qualified_bdc_income",
76+
"farm_operations_income",
77+
"estate_income_would_be_qualified",
78+
"farm_operations_income_would_be_qualified",
79+
"farm_rent_income_would_be_qualified",
80+
"partnership_s_corp_income_would_be_qualified",
81+
"rental_income_would_be_qualified",
82+
"self_employment_income_would_be_qualified",
7283
]
7384

7485

policyengine_us_data/datasets/puf/puf.py

Lines changed: 198 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,167 @@
1+
import os
2+
import yaml
3+
from importlib.resources import files
4+
15
from tqdm import tqdm
26
import numpy as np
37
import pandas as pd
48
from microdf import MicroDataFrame
9+
510
from policyengine_core.data import Dataset
611
from policyengine_us_data.storage import STORAGE_FOLDER
712
from policyengine_us_data.datasets.puf.uprate_puf import uprate_puf
813
from policyengine_us_data.datasets.puf.irs_puf import IRS_PUF_2015
914
from policyengine_us_data.utils.uprating import (
1015
create_policyengine_uprating_factors_table,
1116
)
12-
import os
17+
1318

1419
rng = np.random.default_rng(seed=64)
1520

21+
# Get Qualified Business Income simulation parameters ---
22+
yamlfilename = (
23+
files("policyengine_us_data") / "datasets" / "puf" / "qbi_assumptions.yaml"
24+
)
25+
with open(yamlfilename, "r", encoding="utf-8") as yamlfile:
26+
QBI_PARAMS = yaml.safe_load(yamlfile)
27+
assert isinstance(QBI_PARAMS, dict)
28+
29+
30+
# Helper functions ---
31+
def sample_bernoulli_lognormal(n, prob, log_mean, log_sigma, rng):
32+
"""Generate a Bernoulli-lognormal mixture."""
33+
positive = np.random.binomial(1, prob, size=n)
34+
amounts = np.where(
35+
positive,
36+
rng.lognormal(mean=log_mean, sigma=log_sigma, size=n),
37+
0.0,
38+
)
39+
return amounts
40+
41+
42+
def conditionally_sample_lognormal(flag, target_mean, log_sigma, rng):
43+
"""Generate a lognormal conditional on a binary flag."""
44+
mu = np.log(target_mean) - (log_sigma**2 / 2)
45+
return np.where(
46+
flag,
47+
rng.lognormal(
48+
mean=mu,
49+
sigma=log_sigma,
50+
),
51+
0.0,
52+
)
53+
54+
55+
def simulate_w2_and_ubia_from_puf(puf, *, seed=None, diagnostics=True):
56+
"""
57+
Simulate two Section 199A guard-rail quantities for every record
58+
- W-2 wages paid by the business
59+
- Unadjusted basis immediately after acquisition (UBIA) of property
60+
61+
Parameters
62+
----------
63+
puf : pandas.DataFrame
64+
Must contain the income columns created in your preprocessing block.
65+
seed : int, optional
66+
For reproducible random draws.
67+
diagnostics : bool, default True
68+
Print high-level checks after the simulation runs.
69+
70+
Returns
71+
-------
72+
w2_wages : 1-D NumPy array
73+
ubia : 1-D NumPy array
74+
"""
75+
rng = np.random.default_rng(seed)
76+
77+
# Extract Qualified Business Income simulation parameters
78+
qbi_probs = QBI_PARAMS["qbi_qualification_probabilities"]
79+
margin_params = QBI_PARAMS["profit_margin_distribution"]
80+
logit_params = QBI_PARAMS["has_employees_logit"]
81+
82+
labor_params = QBI_PARAMS["labor_ratio_distribution"]
83+
rental_labor = labor_params["rental"]
84+
non_rental_labor = labor_params["non_rental"]
85+
86+
rental_beta_a = rental_labor["beta_a"]
87+
rental_beta_b = rental_labor["beta_b"]
88+
rental_scale = rental_labor["scale"]
89+
90+
non_rental_beta_a = non_rental_labor["beta_a"]
91+
non_rental_beta_b = non_rental_labor["beta_b"]
92+
non_rental_scale = non_rental_labor["scale"]
93+
94+
depr_sigma = QBI_PARAMS["depreciation_proxy_sigma"]
95+
96+
ubia_params = QBI_PARAMS["ubia_simulation"]
97+
ubia_multiple_of_qbi = ubia_params["multiple_of_qbi"]
98+
ubia_sigma = ubia_params["sigma"]
99+
100+
# Estimate qualified business income
101+
qbi = sum(
102+
puf[income_type] * prob for income_type, prob in qbi_probs.items()
103+
).to_numpy()
104+
105+
# Simulate gross receipts by drawing a profit margin
106+
margins = (
107+
rng.beta(margin_params["beta_a"], margin_params["beta_b"], qbi.size)
108+
* margin_params["scale"]
109+
+ margin_params["shift"]
110+
)
111+
revenues = np.maximum(qbi, 0) / margins
112+
113+
logit = (
114+
logit_params["intercept"] + logit_params["slope_per_dollar"] * revenues
115+
)
116+
117+
# Set p = 0 when simulated receipts == 0 (no revenue means no payroll)
118+
pr_has_employees = np.where(
119+
revenues == 0.0, 0.0, 1.0 / (1.0 + np.exp(-logit))
120+
)
121+
has_employees = rng.binomial(1, pr_has_employees)
122+
123+
# Labor share simulation
124+
is_rental = puf["rental_income"].to_numpy() > 0
125+
126+
labor_ratios = np.where(
127+
is_rental,
128+
rng.beta(rental_beta_a, rental_beta_b, qbi.size) * rental_scale,
129+
rng.beta(non_rental_beta_a, non_rental_beta_b, qbi.size)
130+
* non_rental_scale,
131+
)
132+
133+
w2_wages = revenues * labor_ratios * has_employees
134+
135+
# A depreciation stand-in that scales with rents
136+
depreciation_proxy = conditionally_sample_lognormal(
137+
is_rental,
138+
puf["rental_income"],
139+
depr_sigma,
140+
rng,
141+
)
142+
143+
# UBIA simulation: lognormal, but only for capital-heavy records
144+
is_capital_intensive = is_rental | (depreciation_proxy > 0)
145+
146+
ubia = conditionally_sample_lognormal(
147+
is_capital_intensive,
148+
ubia_multiple_of_qbi * np.maximum(qbi, 0),
149+
ubia_sigma,
150+
rng,
151+
)
152+
153+
if diagnostics:
154+
share_qbi_pos = np.mean(qbi > 0)
155+
share_wages = np.mean((w2_wages > 0) & (qbi > 0))
156+
print(f"Share with QBI > 0: {share_qbi_pos:6.2%}")
157+
print(f"Among those, share with W-2 wages: {share_wages:6.2%}")
158+
if np.any(w2_wages > 0):
159+
print(f"Mean W-2 (if >0): ${np.mean(w2_wages[w2_wages>0]):,.0f}")
160+
if np.any(ubia > 0):
161+
print(f"Median UBIA (if >0): ${np.median(ubia[ubia>0]):,.0f}")
162+
163+
return w2_wages, ubia
164+
16165

17166
def impute_pension_contributions_to_puf(puf_df):
18167
from policyengine_us import Microsimulation
@@ -154,8 +303,8 @@ def preprocess_puf(puf: pd.DataFrame) -> pd.DataFrame:
154303
puf["educator_expense"] = puf.E03220
155304
puf["employment_income"] = puf.E00200
156305
puf["estate_income"] = puf.E26390 - puf.E26400
306+
# Schedule J, separate from QBI
157307
puf["farm_income"] = puf.T27800
158-
puf["farm_rent_income"] = puf.E27200
159308
puf["health_savings_account_ald"] = puf.E03290
160309
puf["interest_deduction"] = puf.E19200
161310
puf["long_term_capital_gains"] = puf.P23250
@@ -170,11 +319,21 @@ def preprocess_puf(puf: pd.DataFrame) -> pd.DataFrame:
170319
# that can be deducted under the miscellaneous deduction.
171320
puf["unreimbursed_business_employee_expenses"] = puf.E20400
172321
puf["non_qualified_dividend_income"] = puf.E00600 - puf.E00650
173-
puf["partnership_s_corp_income"] = puf.E26270
174322
puf["qualified_dividend_income"] = puf.E00650
175323
puf["qualified_tuition_expenses"] = puf.E03230
176324
puf["real_estate_taxes"] = puf.E18500
325+
# Schedule E rent and royalty
177326
puf["rental_income"] = puf.E25850 - puf.E25860
327+
# Schedule E active S-Corp income
328+
s_corp_income = puf.E26190 - puf.E26180
329+
# Schedule E active partnership income
330+
partnership_income = puf.E25980 - puf.E25960
331+
puf["partnership_s_corp_income"] = s_corp_income + partnership_income
332+
# Schedule F active farming operations
333+
puf["farm_operations_income"] = puf.E02100
334+
# Schedule E farm rental income
335+
puf["farm_rent_income"] = puf.E27200
336+
# Schedule C Sole Proprietorship
178337
puf["self_employment_income"] = puf.E00900
179338
puf["self_employed_health_insurance_ald"] = puf.E03270
180339
puf["self_employed_pension_contribution_ald"] = puf.E03300
@@ -211,15 +370,38 @@ def preprocess_puf(puf: pd.DataFrame) -> pd.DataFrame:
211370
# Ignore f2441 (AMT form attached)
212371
# Ignore cmbtp (estimate of AMT income not in AGI)
213372
# Ignore k1bx14s and k1bx14p (partner self-employment income included in partnership and S-corp income)
214-
qbi = np.maximum(0, puf.E00900 + puf.E26270 + puf.E02100 + puf.E27200)
215-
# 10.1% passthrough rate for W2 wages hits the JCT tax expenditure target for QBID
216-
# https://gist.github.com/nikhilwoodruff/262c80b8b17935d6fb8544647143b854
217-
W2_WAGES_SCALE = 0.101
218-
puf["w2_wages_from_qualified_business"] = qbi * W2_WAGES_SCALE
219373

220-
# Remove aggregate records
221-
puf = puf[puf.MARS != 0]
374+
# --- Qualified Business Income Deduction (QBID) simulation ---
375+
w2, ubia = simulate_w2_and_ubia_from_puf(puf, seed=42)
376+
puf["w2_wages_from_qualified_business"] = w2
377+
puf["unadjusted_basis_qualified_property"] = ubia
222378

379+
puf_qbi_sources_for_sstb = puf[QBI_PARAMS["sstb_prob_map_by_name"].keys()]
380+
largest_qbi_source_name = puf_qbi_sources_for_sstb.idxmax(axis=1)
381+
382+
pr_sstb = largest_qbi_source_name.map(
383+
QBI_PARAMS["sstb_prob_map_by_name"]
384+
).fillna(0.0)
385+
puf["business_is_sstb"] = np.random.binomial(n=1, p=pr_sstb)
386+
387+
reit_params = QBI_PARAMS["reit_ptp_income_distribution"]
388+
p_reit_ptp = reit_params["probability_of_receiving"]
389+
mu_reit_ptp = reit_params["log_normal_mu"]
390+
sigma_reit_ptp = reit_params["log_normal_sigma"]
391+
392+
puf["qualified_reit_and_ptp_income"] = sample_bernoulli_lognormal(
393+
len(puf), p_reit_ptp, mu_reit_ptp, sigma_reit_ptp, rng
394+
)
395+
396+
bdc_params = QBI_PARAMS["bdc_income_distribution"]
397+
p_bdc = bdc_params["probability_of_receiving"]
398+
mu_bdc = bdc_params["log_normal_mu"]
399+
sigma_bdc = bdc_params["log_normal_sigma"]
400+
401+
puf["qualified_bdc_income"] = sample_bernoulli_lognormal(
402+
len(puf), p_bdc, mu_bdc, sigma_bdc, rng
403+
)
404+
# -------- End of Qualified Business Income Deduction (QBID) -------
223405
puf["filing_status"] = puf.MARS.map(
224406
{
225407
1: "SINGLE",
@@ -248,6 +430,7 @@ def preprocess_puf(puf: pd.DataFrame) -> pd.DataFrame:
248430
"educator_expense",
249431
"employment_income",
250432
"estate_income",
433+
"farm_operations_income",
251434
"farm_income",
252435
"farm_rent_income",
253436
"health_savings_account_ald",
@@ -257,7 +440,6 @@ def preprocess_puf(puf: pd.DataFrame) -> pd.DataFrame:
257440
"unreimbursed_business_employee_expenses",
258441
"non_qualified_dividend_income",
259442
"non_sch_d_capital_gains",
260-
"partnership_s_corp_income",
261443
"qualified_dividend_income",
262444
"qualified_tuition_expenses",
263445
"real_estate_taxes",
@@ -293,7 +475,12 @@ def preprocess_puf(puf: pd.DataFrame) -> pd.DataFrame:
293475
"unreported_payroll_tax",
294476
"pre_tax_contributions",
295477
"w2_wages_from_qualified_business",
478+
"unadjusted_basis_qualified_property",
479+
"business_is_sstb",
296480
"deductible_mortgage_interest",
481+
"partnership_s_corp_income",
482+
"qualified_reit_and_ptp_income",
483+
"qualified_bdc_income",
297484
]
298485

299486

0 commit comments

Comments
 (0)