diff --git a/.github/workflows/reusable_test.yaml b/.github/workflows/reusable_test.yaml index 401efd17..99cfff16 100644 --- a/.github/workflows/reusable_test.yaml +++ b/.github/workflows/reusable_test.yaml @@ -45,6 +45,11 @@ jobs: with: python-version: '3.13' + - name: Set up Node.js + uses: actions/setup-node@v4 + with: + node-version: '24' + - uses: "google-github-actions/auth@v2" if: inputs.upload_data with: @@ -94,5 +99,5 @@ jobs: uses: JamesIves/github-pages-deploy-action@v4 with: branch: gh-pages - folder: docs/_build/site + folder: docs/_build/html clean: true diff --git a/Makefile b/Makefile index 994173d1..78d0904d 100644 --- a/Makefile +++ b/Makefile @@ -33,8 +33,8 @@ documentation: rm -rf _build .jupyter_cache && \ rm -f _toc.yml && \ myst clean && \ - timeout 10 myst build --html || true - cd docs && test -d _build/site && touch _build/site/.nojekyll || true + myst build --html + cd docs && test -d _build/html && touch _build/html/.nojekyll || true documentation-build: cd docs && \ diff --git a/changelog_entry.yaml b/changelog_entry.yaml index e69de29b..bd0c65df 100644 --- a/changelog_entry.yaml +++ b/changelog_entry.yaml @@ -0,0 +1,11 @@ +- bump: patch + changes: + fixed: + - GitHub Pages documentation deployment (was deploying wrong directory causing blank pages) + - Removed timeout and error suppression from documentation build + added: + - Node.js 24 LTS setup to CI workflow for MyST builds + - H6 Social Security reform calibration for long-term projections (phases out OASDI taxation 2045-2054) + - H6 threshold crossover handling when OASDI thresholds exceed HI thresholds + - start_year parameter to run_household_projection.py CLI + - docs/README.md documenting MyST build output pitfall diff --git a/docs/README.md b/docs/README.md new file mode 100644 index 00000000..d6ab94c3 --- /dev/null +++ b/docs/README.md @@ -0,0 +1,46 @@ +# Documentation + +This project uses [MyST Markdown](https://mystmd.org/) for documentation. + +## Building Locally + +### Requirements +- Python 3.13+ with dev dependencies: `uv pip install -e .[dev] --system` +- Node.js 20+ (required by MyST) + +### Commands +```bash +make documentation # Build static HTML files +make documentation-serve # Serve locally on http://localhost:8080 +``` + +## Important: MyST Build Outputs + +**MyST creates two different outputs - DO NOT confuse them:** + +- `_build/html/` - **Static HTML files (use for GitHub Pages deployment)** +- `_build/site/` - Dynamic content for `myst start` development server only + +**GitHub Pages must deploy `_build/html/`**, not `_build/site/`. The `_build/site/` directory contains JSON files for MyST's development server and will result in a blank page on GitHub Pages. + +## GitHub Pages Deployment + +- Site URL: https://policyengine.github.io/policyengine-us-data/ +- Deployed from: `docs/_build/html/` directory +- Propagation time: 5-10 minutes after push to gh-pages branch +- Workflow: `.github/workflows/code_changes.yaml` (on main branch only) + +## Troubleshooting + +**Blank page after deployment:** +- Check that workflow deploys `folder: docs/_build/html` (not `_build/site`) +- Wait 5-10 minutes for GitHub Pages propagation +- Hard refresh browser (Ctrl+Shift+R / Cmd+Shift+R) + +**Build fails in CI:** +- Ensure Node.js setup step exists in workflow (MyST requires Node.js) +- Never add timeouts or `|| true` to build commands - they mask failures + +**Missing index.html:** +- MyST auto-generates index.html in `_build/html/` +- Do not create manual index.html in docs/ diff --git a/docs/abstract.md b/docs/abstract.md index 6741cd2d..b6963c11 100644 --- a/docs/abstract.md +++ b/docs/abstract.md @@ -6,15 +6,6 @@ quantile regression forests to impute 67 tax variables from the PUF onto CPS rec preserving distributional characteristics while maintaining household composition and member relationships. The imputation process alone does not guarantee consistency with official statistics, necessitating a reweighting step to align the combined dataset with known -population totals and administrative benchmarks. We apply a reweighting algorithm that - calibrates the dataset to 2,813 targets from -the IRS Statistics of Income, Census population projections, Congressional Budget -Office benefit program estimates, Treasury -expenditure data, Joint Committee on Taxation tax expenditure estimates, healthcare -spending patterns, and other benefit program costs. The reweighting employs dropout-regularized - gradient descent optimization -to ensure consistency with administrative benchmarks. Validation shows the enhanced dataset -reduces error in key tax components by [TO BE CALCULATED]% relative to the baseline CPS. -The dataset maintains the CPS's demographic detail and geographic granularity while +population totals and administrative benchmarks. We apply a reweighting algorithm that calibrates the dataset to 2,813 targets from the IRS Statistics of Income, Census population projections, Congressional Budget Office benefit program estimates, Treasury expenditure data, Joint Committee on Taxation tax expenditure estimates, healthcare spending patterns, and other benefit program costs. The reweighting employs dropout-regularized gradient descent optimization to ensure consistency with administrative benchmarks. The dataset maintains the CPS's demographic detail and geographic granularity while incorporating tax reporting data from administrative sources. We release the enhanced dataset, source code, and documentation to support policy analysis. diff --git a/docs/appendix.md b/docs/appendix.md index 2d9d0062..7bc41f73 100644 --- a/docs/appendix.md +++ b/docs/appendix.md @@ -46,4 +46,95 @@ for iteration in range(5000): ### Table A1: Complete List of Imputed Variables -[TO BE GENERATED - Complete list of 72 imputed variables from PUF organized by category] \ No newline at end of file +#### Variables Imputed from IRS Public Use File (67 variables) + +**Income Variables:** +- employment_income +- partnership_s_corp_income +- social_security +- taxable_pension_income +- tax_exempt_pension_income +- long_term_capital_gains +- short_term_capital_gains +- taxable_ira_distributions +- self_employment_income +- qualified_dividend_income +- non_qualified_dividend_income +- rental_income +- taxable_unemployment_compensation +- taxable_interest_income +- tax_exempt_interest_income +- estate_income +- miscellaneous_income +- farm_income +- alimony_income +- farm_rent_income +- non_sch_d_capital_gains +- long_term_capital_gains_on_collectibles +- unrecaptured_section_1250_gain +- salt_refund_income + +**Deductions and Adjustments:** +- interest_deduction +- unreimbursed_business_employee_expenses +- pre_tax_contributions +- charitable_cash_donations +- self_employed_pension_contribution_ald +- domestic_production_ald +- self_employed_health_insurance_ald +- charitable_non_cash_donations +- alimony_expense +- health_savings_account_ald +- student_loan_interest +- investment_income_elected_form_4952 +- early_withdrawal_penalty +- educator_expense +- deductible_mortgage_interest + +**Tax Credits:** +- cdcc_relevant_expenses +- foreign_tax_credit +- american_opportunity_credit +- general_business_credit +- energy_efficient_home_improvement_credit +- amt_foreign_tax_credit +- excess_withheld_payroll_tax +- savers_credit +- prior_year_minimum_tax_credit +- other_credits + +**Qualified Business Income Variables:** +- w2_wages_from_qualified_business +- unadjusted_basis_qualified_property +- business_is_sstb +- qualified_reit_and_ptp_income +- qualified_bdc_income +- farm_operations_income +- estate_income_would_be_qualified +- farm_operations_income_would_be_qualified +- farm_rent_income_would_be_qualified +- partnership_s_corp_income_would_be_qualified +- rental_income_would_be_qualified +- self_employment_income_would_be_qualified + +**Other Tax Variables:** +- traditional_ira_contributions +- qualified_tuition_expenses +- casualty_loss +- unreported_payroll_tax +- recapture_of_investment_credit + +#### Variables Imputed from Survey of Income and Program Participation (1 variable) + +- tip_income + +#### Variables Imputed from Survey of Consumer Finances (3 variables) + +- networth +- auto_loan_balance +- auto_loan_interest + +#### Variables Imputed from American Community Survey (2 variables) + +- rent +- real_estate_taxes \ No newline at end of file diff --git a/docs/conclusion.md b/docs/conclusion.md index 7bd2c5a4..519c9f8e 100644 --- a/docs/conclusion.md +++ b/docs/conclusion.md @@ -18,7 +18,7 @@ Our work makes several key contributions: The validation results demonstrate that combining survey and administrative data through principled statistical methods can achieve: - Improved income distribution representation -- Better alignment with program participation totals +- Better alignment with program participation totals - Maintained demographic and geographic detail - Suitable accuracy for policy simulation diff --git a/docs/discussion.md b/docs/discussion.md index 1bbda6c7..476a813f 100644 --- a/docs/discussion.md +++ b/docs/discussion.md @@ -8,7 +8,7 @@ We examine the strengths, limitations, and potential applications of the Enhance The Enhanced CPS uniquely combines: - Demographic detail from the CPS including state identifiers -- Tax precision from IRS administrative data +- Tax precision from IRS administrative data - Calibration to contemporary official statistics - Open-source availability for research use @@ -26,7 +26,7 @@ The large-scale calibration to 2,813 targets ensures consistency with administra ### Practical Advantages -For policy analysis, the dataset offers state-level geographic detail enabling subnational analysis, household structure for distributional studies, tax detail for revenue estimation, program participation for benefit analysis, and recent data calibrated to current totals. +For policy analysis, the dataset offers several key features: state-level geographic detail for subnational analysis, household structure for distributional studies, tax detail for revenue estimation, program participation for benefit analysis, and calibration to current administrative totals. ## Limitations diff --git a/docs/introduction.md b/docs/introduction.md index ddcb5e56..78d9591d 100644 --- a/docs/introduction.md +++ b/docs/introduction.md @@ -1,10 +1,10 @@ # Introduction -Microsimulation models require high-quality microdata that accurately represents both demographic characteristics and economic outcomes. The ideal dataset would combine the demographic richness and household structure of surveys with the income precision of administrative tax records. However, publicly available datasets typically excel in one dimension while lacking in the other. +Microsimulation models require high-quality microdata that accurately represent demographic characteristics and economic outcomes. The ideal dataset would combine the demographic richness and household structure of surveys with the income precision of administrative tax records. However, publicly available datasets typically excel in one dimension while lacking in the other. The Current Population Survey (CPS) Annual Social and Economic Supplement provides detailed household demographics, family relationships, and program participation data for a representative sample of US households. However, it suffers from well-documented income underreporting, particularly at the top of the distribution. The IRS Public Use File (PUF) contains accurate tax return information but lacks household structure, demographic detail, and state identifiers needed for comprehensive policy analysis. -This paper presents a methodology for creating an Enhanced CPS dataset that combines the strengths of both sources. Through an enhancement process—imputation followed by reweighting—we create a dataset suitable for analyzing both tax and transfer policies at federal and state levels. +This paper presents a methodology for creating an Enhanced CPS dataset that combines the strengths of both sources. Through an enhancement process: imputation followed by reweighting, we create a dataset suitable for analyzing both tax and transfer policies at federal and state levels. ## Related Work @@ -24,4 +24,4 @@ Our empirical contribution involves creating and validating a publicly available From a practical perspective, we provide open-source tools and comprehensive documentation that enable researchers to apply these methods, modify the approach, or build upon our work. This transparency contrasts with existing proprietary models and supports reproducible research. Government agencies could use our framework to enhance their own microsimulation capabilities, while academic researchers gain access to data suitable for analyzing distributional impacts of tax and transfer policies. The modular design allows incremental improvements as new data sources become available. -We organize the remainder of this paper as follows. Section 2 describes our data sources including the primary datasets and calibration targets. Section 3 details the enhancement methodology including both the imputation and reweighting stages. Section 4 presents validation results comparing performance across datasets. Section 5 discusses limitations, applications, and future directions. Section 6 concludes with implications for policy analysis. \ No newline at end of file +We organize the remainder of this paper as follows. Section 2 describes our data sources including the primary datasets and calibration targets. Section 3 details the enhancement methodology including both the imputation and reweighting stages. Section 4 presents validation results comparing performance across datasets. Section 5 discusses limitations, applications, and future directions. Section 6 concludes with implications for policy analysis. diff --git a/docs/pwbm_ss_comparison_2025_2100.ipynb b/docs/long_term_projections.ipynb similarity index 96% rename from docs/pwbm_ss_comparison_2025_2100.ipynb rename to docs/long_term_projections.ipynb index 3296e4cb..10b07b72 100644 --- a/docs/pwbm_ss_comparison_2025_2100.ipynb +++ b/docs/long_term_projections.ipynb @@ -3,10 +3,17 @@ { "cell_type": "markdown", "metadata": {}, - "source": [ - "# Comparison to Penn Wharton Budget Model: Eliminating Tax on Social Security 2025-2100\n", - "## Integrating Economic Uprating with Demographic Reweighting" - ] + "source": "# Long Term Projections\n## Integrating Economic Uprating with Demographic Reweighting" + }, + { + "cell_type": "markdown", + "source": "## Executive Summary\n\nThis document outlines an innovative approach for projecting federal income tax revenue through 2100 that uniquely combines sophisticated economic microsimulation with demographic reweighting. By harmonizing PolicyEngine's state-of-the-art tax modeling with Social Security Administration demographic projections, we can isolate and quantify the fiscal impact of population aging while preserving the full complexity of the tax code.", + "metadata": {} + }, + { + "cell_type": "markdown", + "source": "## The Challenge\n\nProjecting tax revenue over a 75-year horizon requires simultaneously modeling two distinct but interrelated dynamics:\n\n**Economic Evolution**: How incomes, prices, and tax parameters change over time\n- Wage growth and income distribution shifts\n- Inflation affecting brackets and deductions\n- Legislative changes and indexing rules\n- Behavioral responses to tax policy\n\n**Demographic Transformation**: How the population structure evolves\n- Baby boom generation aging through retirement\n- Declining birth rates reducing working-age population\n- Increasing longevity extending retirement duration\n- Shifting household composition patterns\n\nTraditional approaches typically sacrifice either economic sophistication (using simplified tax calculations) or demographic realism (holding age distributions constant). Our methodology preserves both.", + "metadata": {} }, { "cell_type": "markdown", @@ -176,17 +183,6 @@ "- `--save-h5`: Save year-specific .h5 files to `./projected_datasets/` directory" ] }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "---\n", - "\n", - "## Executive Summary\n", - "\n", - "This document outlines an innovative approach for projecting federal income tax revenue through 2100 that uniquely combines sophisticated economic microsimulation with demographic reweighting. By harmonizing PolicyEngine's state-of-the-art tax modeling with Social Security Administration demographic projections, we can isolate and quantify the fiscal impact of population aging while preserving the full complexity of the tax code." - ] - }, { "cell_type": "markdown", "metadata": {}, @@ -210,13 +206,6 @@ "Traditional approaches typically sacrifice either economic sophistication (using simplified tax calculations) or demographic realism (holding age distributions constant). Our methodology preserves both." ] }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Loading and Exploring the Data" - ] - }, { "cell_type": "markdown", "metadata": {}, @@ -1023,4 +1012,4 @@ }, "nbformat": 4, "nbformat_minor": 4 -} +} \ No newline at end of file diff --git a/docs/methodology.md b/docs/methodology.md index 77ad6317..603acda6 100644 --- a/docs/methodology.md +++ b/docs/methodology.md @@ -147,7 +147,20 @@ From the American Community Survey (ACS), we impute property taxes for homeowner ### Example: Tip Income Imputation -To illustrate how QRF preserves conditional distributions, consider tip income imputation. The training data from SIPP contains workers with employment income and tip income. For a worker with predictors of $30,000 employment income, age 25, and no children, QRF finds that similar workers in SIPP have a conditional distribution ranging from $0 at the 10th percentile (no tips) to $2,000 at the median, $8,000 at the 90th percentile, and $15,000 at the 99th percentile. If the random quantile drawn is 0.85, the imputed tip income would be approximately $6,500. This approach ensures that some similar workers receive no tips while others receive substantial tips, preserving realistic variation. +To illustrate how QRF preserves conditional distributions, consider tip income imputation. The training data from SIPP contains workers with employment income and tip income. + +For a worker with the following characteristics: +- Employment income: \$30,000 +- Age: 25 +- Number of children: 0 + +QRF finds that similar workers in SIPP have a conditional distribution of tip income: +- 10th percentile: \$0 (no tips) +- 50th percentile: \$2,000 +- 90th percentile: \$8,000 +- 99th percentile: \$15,000 + +If the random quantile drawn is 0.85, the imputed tip income would be approximately \$6,500. This approach ensures that some similar workers receive no tips while others receive substantial tips, preserving realistic variation. ## Stage 2: Reweighting @@ -185,7 +198,7 @@ The calibration process incorporates tax and benefit calculations through Policy ### Convergence -The optimization converges within iterations. We monitor convergence through the loss value trajectory, weight stability across iterations, and target achievement rates. +The optimization converges within 500 epochs. We monitor convergence through the loss value trajectory, weight stability across iterations, and target achievement rates. ## Validation diff --git a/docs/myst.yml b/docs/myst.yml index 37ddbbe5..a39b3cfb 100644 --- a/docs/myst.yml +++ b/docs/myst.yml @@ -24,7 +24,7 @@ project: - file: background.md - file: data.md - file: methodology.md - - file: pwbm_ss_comparison_2025_2100.ipynb + - file: long_term_projections.ipynb - file: discussion.md - file: conclusion.md - file: appendix.md diff --git a/policyengine_us_data/datasets/cps/long_term/calibration.py b/policyengine_us_data/datasets/cps/long_term/calibration.py index 6a8d293e..5019baab 100644 --- a/policyengine_us_data/datasets/cps/long_term/calibration.py +++ b/policyengine_us_data/datasets/cps/long_term/calibration.py @@ -83,6 +83,8 @@ def calibrate_greg( ss_target=None, payroll_values=None, payroll_target=None, + h6_income_values=None, + h6_revenue_target=None, n_ages=86, ): """ @@ -97,6 +99,8 @@ def calibrate_greg( ss_target: Optional Social Security target total payroll_values: Optional taxable payroll values per household payroll_target: Optional taxable payroll target total + h6_income_values: Optional H6 reform income values per household + h6_revenue_target: Optional H6 reform total revenue impact target n_ages: Number of age groups Returns: @@ -108,9 +112,13 @@ def calibrate_greg( controls[f"age_{age_idx}"] = y_target[age_idx] # Build auxiliary variables dataframe if any continuous constraints are provided - if (ss_values is not None and ss_target is not None) or ( - payroll_values is not None and payroll_target is not None - ): + needs_aux_df = ( + (ss_values is not None and ss_target is not None) + or (payroll_values is not None and payroll_target is not None) + or (h6_income_values is not None and h6_revenue_target is not None) + ) + + if needs_aux_df: age_cols = {f"age_{i}": X[:, i] for i in range(n_ages)} aux_df = pd.DataFrame(age_cols) @@ -122,6 +130,11 @@ def calibrate_greg( aux_df["payroll_total"] = payroll_values controls["payroll_total"] = payroll_target + # H6 reform revenue impact as a simple linear constraint + if h6_income_values is not None and h6_revenue_target is not None: + aux_df["h6_revenue"] = h6_income_values + controls["h6_revenue"] = h6_revenue_target + aux_vars = aux_df else: aux_vars = X @@ -145,6 +158,8 @@ def calibrate_weights( ss_target=None, payroll_values=None, payroll_target=None, + h6_income_values=None, + h6_revenue_target=None, n_ages=86, max_iters=100, tol=1e-6, @@ -163,6 +178,8 @@ def calibrate_weights( ss_target: Optional SS target (for GREG with SS) payroll_values: Optional payroll values (for GREG with payroll) payroll_target: Optional payroll target (for GREG with payroll) + h6_income_values: Optional H6 reform income values per household + h6_revenue_target: Optional H6 reform total revenue impact target n_ages: Number of age groups max_iters: Max iterations for IPF tol: Convergence tolerance for IPF @@ -185,6 +202,8 @@ def calibrate_weights( ss_target, payroll_values, payroll_target, + h6_income_values, + h6_revenue_target, n_ages, ) except Exception as e: diff --git a/policyengine_us_data/datasets/cps/long_term/run_household_projection.py b/policyengine_us_data/datasets/cps/long_term/run_household_projection.py index f47d9b74..9dbb7f2d 100644 --- a/policyengine_us_data/datasets/cps/long_term/run_household_projection.py +++ b/policyengine_us_data/datasets/cps/long_term/run_household_projection.py @@ -3,16 +3,19 @@ Usage: - python run_household_projection.py [END_YEAR] [--greg] [--use-ss] [--use-payroll] [--save-h5] + python run_household_projection.py [START_YEAR] [END_YEAR] [--greg] [--use-ss] [--use-payroll] [--use-h6-reform] [--save-h5] + START_YEAR: Optional starting year (default: 2025) END_YEAR: Optional ending year (default: 2035) --greg: Use GREG calibration instead of IPF (optional) --use-ss: Include Social Security benefit totals as calibration target (requires --greg) --use-payroll: Include taxable payroll totals as calibration target (requires --greg) + --use-h6-reform: Include H6 reform income impact ratio as calibration target (requires --greg) --save-h5: Save year-specific .h5 files with calibrated weights to ./projected_datasets/ Examples: - python run_household_projection.py 2100 --greg --use-ss --use-payroll --save-h5 + python run_household_projection.py 2045 2045 --greg --use-ss # single year + python run_household_projection.py 2025 2100 --greg --use-ss --use-payroll --use-h6-reform --save-h5 """ import sys @@ -36,6 +39,171 @@ ) +def create_h6_reform(): + """ + Implements Proposal H6: + 1. Phase out OASDI taxation (Tier 1) from 2045-2053 by raising thresholds. + 2. Eliminate OASDI taxation fully in 2054+ (set Tier 1 rate to 0%). + 3. HOLD HARMLESS: Maintain HI taxation (Tier 2) revenue at current law levels throughout. + + CRITICAL: Handles the "Threshold Crossover" problem. + As OASDI thresholds rise above HI thresholds ($34k/$44k), we must + swap the parameter definitions to prevent the engine from breaking. + """ + + reform_payload = { + # Thresholds + "gov.irs.social_security.taxability.threshold.base.main.SINGLE": {}, + "gov.irs.social_security.taxability.threshold.base.main.JOINT": {}, + "gov.irs.social_security.taxability.threshold.base.main.HEAD_OF_HOUSEHOLD": {}, + "gov.irs.social_security.taxability.threshold.base.main.SURVIVING_SPOUSE": {}, + "gov.irs.social_security.taxability.threshold.base.main.SEPARATE": {}, + "gov.irs.social_security.taxability.threshold.adjusted_base.main.SINGLE": {}, + "gov.irs.social_security.taxability.threshold.adjusted_base.main.JOINT": {}, + "gov.irs.social_security.taxability.threshold.adjusted_base.main.HEAD_OF_HOUSEHOLD": {}, + "gov.irs.social_security.taxability.threshold.adjusted_base.main.SURVIVING_SPOUSE": {}, + "gov.irs.social_security.taxability.threshold.adjusted_base.main.SEPARATE": {}, + # Rates - Base (Tier 1) + "gov.irs.social_security.taxability.rate.base.benefit_cap": {}, + "gov.irs.social_security.taxability.rate.base.excess": {}, + # Rates - Additional (Tier 2 - HI) + "gov.irs.social_security.taxability.rate.additional.benefit_cap": {}, + "gov.irs.social_security.taxability.rate.additional.excess": {}, + } + + # --- CONSTANTS: CURRENT LAW HI THRESHOLDS (FROZEN) --- + # We must preserve these specific triggers to protect the HI Trust Fund + HI_SINGLE = 34_000 + HI_JOINT = 44_000 + + # --- PHASE 1: THE TRANSITION (2045-2053) --- + for year in range(2045, 2054): + period = f"{year}-01-01" + i = year - 2045 + + # 1. Calculate the Target OASDI Thresholds (Rising) + # (a) 2045 = $32,500 ... (i) 2053 = $92,500 + oasdi_target_single = 32_500 + (7_500 * i) + oasdi_target_joint = 65_000 + (15_000 * i) + + # 2. Handle Threshold Crossover + # OASDI thresholds rise above HI thresholds during phase-out. + # We must swap parameters: put lower threshold in 'base' slot. + + # --- SET RATES FOR TRANSITION (2045-2053) --- + # Joint filers cross immediately in 2045 ($65k OASDI > $44k HI). + # Single filers cross in 2046 ($40k OASDI > $34k HI). + # + # PolicyEngine forces one global rate structure per year. + # We choose swapped rates (0.35/0.85) for ALL years to minimize error: + # + # Trade-off in 2045: + # - Single filers: $225 undertax (15% on $1.5k range) ✓ acceptable + # - Joint filers: Would be $3,150 overtax with default rates ✗ unacceptable + # + # The swapped rate error is 14x smaller and aligns with tax-cutting intent. + + # Tier 1 (Base): HI ONLY (35%) + reform_payload[ + "gov.irs.social_security.taxability.rate.base.benefit_cap" + ][period] = 0.35 + reform_payload["gov.irs.social_security.taxability.rate.base.excess"][ + period + ] = 0.35 + + # Tier 2 (Additional): HI + OASDI Combined (85%) + reform_payload[ + "gov.irs.social_security.taxability.rate.additional.benefit_cap" + ][period] = 0.85 + reform_payload[ + "gov.irs.social_security.taxability.rate.additional.excess" + ][period] = 0.85 + + # --- SET THRESHOLDS (MIN/MAX SWAP) --- + # Always put the smaller number in 'base' and larger in 'adjusted_base' + + # Single + reform_payload[ + "gov.irs.social_security.taxability.threshold.base.main.SINGLE" + ][period] = min(oasdi_target_single, HI_SINGLE) + reform_payload[ + "gov.irs.social_security.taxability.threshold.adjusted_base.main.SINGLE" + ][period] = max(oasdi_target_single, HI_SINGLE) + + # Joint + reform_payload[ + "gov.irs.social_security.taxability.threshold.base.main.JOINT" + ][period] = min(oasdi_target_joint, HI_JOINT) + reform_payload[ + "gov.irs.social_security.taxability.threshold.adjusted_base.main.JOINT" + ][period] = max(oasdi_target_joint, HI_JOINT) + + # Map other statuses (Head/Surviving Spouse -> Single logic, Separate -> Single logic usually) + # Note: Separate is usually 0, but for H6 strictness we map to Single logic here + for status in ["HEAD_OF_HOUSEHOLD", "SURVIVING_SPOUSE", "SEPARATE"]: + reform_payload[ + f"gov.irs.social_security.taxability.threshold.base.main.{status}" + ][period] = min(oasdi_target_single, HI_SINGLE) + reform_payload[ + f"gov.irs.social_security.taxability.threshold.adjusted_base.main.{status}" + ][period] = max(oasdi_target_single, HI_SINGLE) + + # --- PHASE 2: ELIMINATION (2054+) --- + # OASDI is gone. We only collect HI. + # Logic: "Base" becomes the HI tier ($34k). Rate is 0.35. + # "Adjusted" becomes irrelevant (set high or rate to same). + + elim_period = "2054-01-01.2100-12-31" + + # 1. Set Thresholds to "HI Only" mode + # Base = $34k / $44k + reform_payload[ + "gov.irs.social_security.taxability.threshold.base.main.SINGLE" + ][elim_period] = HI_SINGLE + reform_payload[ + "gov.irs.social_security.taxability.threshold.base.main.JOINT" + ][elim_period] = HI_JOINT + + # Adjusted = Infinity (Disable the second tier effectively) + reform_payload[ + "gov.irs.social_security.taxability.threshold.adjusted_base.main.SINGLE" + ][elim_period] = 9_999_999 + reform_payload[ + "gov.irs.social_security.taxability.threshold.adjusted_base.main.JOINT" + ][elim_period] = 9_999_999 + + # Map others + for status in ["HEAD_OF_HOUSEHOLD", "SURVIVING_SPOUSE", "SEPARATE"]: + reform_payload[ + f"gov.irs.social_security.taxability.threshold.base.main.{status}" + ][elim_period] = HI_SINGLE + reform_payload[ + f"gov.irs.social_security.taxability.threshold.adjusted_base.main.{status}" + ][elim_period] = 9_999_999 + + # 2. Set Rates for HI Only Revenue + # Tier 1 (Now the ONLY tier) = 35% (HI Share) + reform_payload["gov.irs.social_security.taxability.rate.base.benefit_cap"][ + elim_period + ] = 0.35 + reform_payload["gov.irs.social_security.taxability.rate.base.excess"][ + elim_period + ] = 0.35 + + # Tier 2 (Disabled via threshold, but zero out for safety) + reform_payload[ + "gov.irs.social_security.taxability.rate.additional.benefit_cap" + ][elim_period] = 0.35 + reform_payload[ + "gov.irs.social_security.taxability.rate.additional.excess" + ][elim_period] = 0.35 + + # Create the Reform Object + from policyengine_core.reforms import Reform + + return Reform.from_dict(reform_payload, country_id="us") + + # ========================================================================= # DATASET CONFIGURATION # ========================================================================= @@ -52,7 +220,6 @@ } SELECTED_DATASET = "enhanced_cps_2024" -START_YEAR = 2025 # Load selected dataset configuration BASE_DATASET_PATH = DATASET_OPTIONS[SELECTED_DATASET]["path"] @@ -79,11 +246,22 @@ ) USE_GREG = True +USE_H6_REFORM = "--use-h6-reform" in sys.argv +if USE_H6_REFORM: + sys.argv.remove("--use-h6-reform") + if not USE_GREG: + print( + "Warning: --use-h6-reform requires --greg, enabling GREG automatically" + ) + USE_GREG = True + from ssa_data import load_h6_income_rate_change + SAVE_H5 = "--save-h5" in sys.argv if SAVE_H5: sys.argv.remove("--save-h5") -END_YEAR = int(sys.argv[1]) if len(sys.argv) > 1 else 2035 +START_YEAR = int(sys.argv[1]) if len(sys.argv) > 1 else 2025 +END_YEAR = int(sys.argv[2]) if len(sys.argv) > 2 else 2035 if USE_GREG: from samplics.weighting import SampleWeight @@ -106,6 +284,8 @@ print(f" Including Social Security benefits constraint: Yes") if USE_PAYROLL: print(f" Including taxable payroll constraint: Yes") +if USE_H6_REFORM: + print(f" Including H6 reform income impact constraint: Yes") if SAVE_H5: print(f" Saving year-specific .h5 files: Yes (to {OUTPUT_DIR}/)") os.makedirs(OUTPUT_DIR, exist_ok=True) @@ -122,7 +302,9 @@ print("STEP 1: DEMOGRAPHIC PROJECTIONS") print("=" * 70) -target_matrix = load_ssa_age_projections(end_year=END_YEAR) +target_matrix = load_ssa_age_projections( + start_year=START_YEAR, end_year=END_YEAR +) n_years = target_matrix.shape[1] n_ages = target_matrix.shape[0] @@ -238,6 +420,50 @@ f" [DEBUG {year}] Payroll baseline: ${payroll_baseline/1e9:.1f}B, target: ${payroll_target/1e9:.1f}B" ) + h6_income_values = None + h6_revenue_target = None + if USE_H6_REFORM: + # Load target ratio from CSV + h6_target_ratio = load_h6_income_rate_change(year) + + # Only calculate H6 reform impacts if the target ratio is non-zero + # (Reform has no effect before 2045, so skip computation for efficiency) + if h6_target_ratio != 0: + # Create and apply H6 reform + h6_reform = create_h6_reform() + reform_sim = Microsimulation( + dataset=BASE_DATASET_PATH, reform=h6_reform + ) + + # Calculate reform income tax + income_tax_reform_hh = reform_sim.calculate( + "income_tax", period=year, map_to="household" + ) + income_tax_reform = income_tax_reform_hh.values + + # Revenue impact per household + h6_income_values = income_tax_reform - income_tax_values + + # Calculate H6 revenue target: ratio × payroll target + # This converts the ratio constraint to an absolute revenue constraint + payroll_target_year = load_taxable_payroll_projections(year) + h6_revenue_target = h6_target_ratio * payroll_target_year + + # Debug output for key years + if year in display_years: + h6_impact_baseline = np.sum( + h6_income_values * baseline_weights + ) + print( + f" [DEBUG {year}] H6 baseline revenue: ${h6_impact_baseline/1e9:.3f}B, target: ${h6_revenue_target/1e9:.3f}B" + ) + print( + f" [DEBUG {year}] H6 target ratio: {h6_target_ratio:.4f} × payroll ${payroll_target_year/1e9:.1f}B" + ) + + del reform_sim + gc.collect() + y_target = target_matrix[:, year_idx] w_new, iterations = calibrate_weights( @@ -250,13 +476,15 @@ ss_target=ss_target, payroll_values=payroll_values, payroll_target=payroll_target, + h6_income_values=h6_income_values, + h6_revenue_target=h6_revenue_target, n_ages=n_ages, max_iters=100, tol=1e-6, verbose=False, ) - if year in display_years and (USE_SS or USE_PAYROLL): + if year in display_years and (USE_SS or USE_PAYROLL or USE_H6_REFORM): if USE_SS: ss_achieved = np.sum(ss_values * w_new) print( @@ -267,6 +495,18 @@ print( f" [DEBUG {year}] Payroll achieved: ${payroll_achieved/1e9:.1f}B (error: {(payroll_achieved - payroll_target)/payroll_target*100:.1f}%)" ) + if USE_H6_REFORM and h6_revenue_target is not None: + h6_revenue_achieved = np.sum(h6_income_values * w_new) + error_pct = ( + (h6_revenue_achieved - h6_revenue_target) + / abs(h6_revenue_target) + * 100 + if h6_revenue_target != 0 + else 0 + ) + print( + f" [DEBUG {year}] H6 achieved revenue: ${h6_revenue_achieved/1e9:.3f}B (error: {error_pct:.1f}%)" + ) weights_matrix[:, year_idx] = w_new baseline_weights_matrix[:, year_idx] = baseline_weights diff --git a/policyengine_us_data/datasets/cps/long_term/ssa_data.py b/policyengine_us_data/datasets/cps/long_term/ssa_data.py index 248e9dc9..46582262 100644 --- a/policyengine_us_data/datasets/cps/long_term/ssa_data.py +++ b/policyengine_us_data/datasets/cps/long_term/ssa_data.py @@ -71,3 +71,21 @@ def load_taxable_payroll_projections(year): row = df[df["year"] == year] nominal_billions = row["taxable_payroll_in_billion_nominal_usd"].values[0] return nominal_billions * 1e9 + + +def load_h6_income_rate_change(year): + """ + Load H6 reform income rate change target for a given year. + + Args: + year: Year to load rate change for + + Returns: + H6 income rate change as decimal (e.g., -0.0018 for -0.18%) + """ + csv_path = STORAGE_FOLDER / "social_security_aux.csv" + df = pd.read_csv(csv_path) + + row = df[df["year"] == year] + # CSV stores as percentage (e.g., -0.18), convert to decimal + return row["h6_income_rate_change"].values[0] / 100 diff --git a/policyengine_us_data/storage/README.md b/policyengine_us_data/storage/README.md index dfe5576d..2b0da6b9 100644 --- a/policyengine_us_data/storage/README.md +++ b/policyengine_us_data/storage/README.md @@ -25,5 +25,7 @@ - **social_security_aux.csv** • Source: SSA Single Year supplementary tables • Date: 2025 Trustees Report - • Location: https://www.ssa.gov/oact/tr/2025/lrIndex.html + • Locations: + - https://www.ssa.gov/oact/tr/2025/lrIndex.html + - `https://www.ssa.gov/oact/solvency/provisions/tables/table_run133.html` • Notes: Contains OASDI cost projections and taxable payroll data (2025-2100) diff --git a/policyengine_us_data/storage/social_security_aux.csv b/policyengine_us_data/storage/social_security_aux.csv index abe4be38..07f875fb 100644 --- a/policyengine_us_data/storage/social_security_aux.csv +++ b/policyengine_us_data/storage/social_security_aux.csv @@ -1,77 +1,77 @@ -year,oasdi_cost_in_billion_2025_usd,cpi_w_intermediate,oasdi_cost_in_billion_nominal_usd,taxable_payroll_in_billion_nominal_usd -2025,1609,100,1609,10621.00 -2026,1660,102.49,1701.334,11129.00 -2027,1715,104.95,1799.8925,11627.00 -2028,1763,107.47,1894.6961,12159.00 -2029,1810,110.05,1991.905,12696.00 -2030,1856,112.69,2091.5264,13239.00 -2031,1903,115.4,2196.062,13798.00 -2032,1947,118.17,2300.7699,14380.00 -2033,1991,121,2409.11,14987.00 -2034,2032,123.91,2517.8512,15594.00 -2035,2073,126.88,2630.2224,16205.00 -2036,2114,129.93,2746.7202,16825.00 -2037,2155,133.04,2867.012,17465.00 -2038,2194,136.24,2989.1056,18132.00 -2039,2233,139.51,3115.2583,18819.00 -2040,2270,142.86,3242.922,19532.00 -2041,2306,146.28,3373.2168,20269.00 -2042,2342,149.79,3508.0818,21035.00 -2043,2378,153.39,3647.6142,21828.00 -2044,2415,157.07,3793.2405,22653.00 -2045,2452,160.84,3943.7968,23507.00 -2046,2488,164.7,4097.736,24391.00 -2047,2527,168.65,4261.7855,25313.00 -2048,2567,172.7,4433.209,26270.00 -2049,2609,176.85,4614.0165,27263.00 -2050,2652,181.09,4802.5068,28300.00 -2051,2696,185.44,4999.4624,29376.00 -2052,2743,189.89,5208.6827,30494.00 -2053,2792,194.44,5428.7648,31661.00 -2054,2842,199.11,5658.7062,32869.00 -2055,2895,203.89,5902.6155,34124.00 -2056,2950,208.78,6159.01,35432.00 -2057,3007,213.79,6428.6653,36790.00 -2058,3066,218.93,6712.3938,38201.00 -2059,3125,224.18,7005.625,39670.00 -2060,3184,229.56,7309.1904,41196.00 -2061,3243,235.07,7623.3201,42782.00 -2062,3303,240.71,7950.6513,44429.00 -2063,3362,246.49,8286.9938,46136.00 -2064,3422,252.4,8637.128,47902.00 -2065,3483,258.46,9002.1618,49733.00 -2066,3544,264.66,9379.5504,51631.00 -2067,3607,271.02,9775.6914,53598.00 -2068,3670,277.52,10184.984,55637.00 -2069,3735,284.18,10614.123,57746.00 -2070,3801,291,11060.91,59930.00 -2071,3867,297.99,11523.2733,62196.00 -2072,3934,305.14,12004.2076,64543.00 -2073,4002,312.46,12504.6492,66975.00 -2074,4071,319.96,13025.5716,69501.00 -2075,4139,327.64,13561.0196,72131.00 -2076,4206,335.5,14111.13,74862.00 -2077,4273,343.55,14679.8915,77698.00 -2078,4339,351.8,15264.602,80650.00 -2079,4403,360.24,15861.3672,83727.00 -2080,4467,368.89,16478.3163,86933.00 -2081,4530,377.74,17111.622,90268.00 -2082,4593,386.81,17766.1833,93749.00 -2083,4655,396.09,18437.9895,97381.00 -2084,4716,405.6,19128.096,101163.00 -2085,4775,415.33,19832.0075,105104.00 -2086,4833,425.3,20554.749,109217.00 -2087,4891,435.51,21300.7941,113504.00 -2088,4948,445.96,22066.1008,117973.00 -2089,5006,456.66,22860.3996,122629.00 -2090,5064,467.62,23680.2768,127477.00 -2091,5125,478.84,24540.55,132518.00 -2092,5188,490.34,25438.8392,137764.00 -2093,5254,502.1,26380.334,143215.00 -2094,5323,514.16,27368.7368,148876.00 -2095,5396,526.49,28409.4004,154754.00 -2096,5472,539.13,29501.1936,160855.00 -2097,5551,552.07,30645.4057,167185.00 -2098,5633,565.32,31844.4756,173750.00 -2099,5719,578.89,33106.7191,180557.00 -2100,5809,592.78,34434.5902,187614.00 +year,oasdi_cost_in_billion_2025_usd,cpi_w_intermediate,oasdi_cost_in_billion_nominal_usd,taxable_payroll_in_billion_nominal_usd,h6_income_rate_change +2025,1609,100,1609,10621,0 +2026,1660,102.49,1701.334,11129,0 +2027,1715,104.95,1799.8925,11627,0 +2028,1763,107.47,1894.6961,12159,0 +2029,1810,110.05,1991.905,12696,0 +2030,1856,112.69,2091.5264,13239,0 +2031,1903,115.4,2196.062,13798,0 +2032,1947,118.17,2300.7699,14380,0 +2033,1991,121,2409.11,14987,0 +2034,2032,123.91,2517.8512,15594,0 +2035,2073,126.88,2630.2224,16205,0 +2036,2114,129.93,2746.7202,16825,0 +2037,2155,133.04,2867.012,17465,0 +2038,2194,136.24,2989.1056,18132,0 +2039,2233,139.51,3115.2583,18819,0 +2040,2270,142.86,3242.922,19532,0 +2041,2306,146.28,3373.2168,20269,0 +2042,2342,149.79,3508.0818,21035,0 +2043,2378,153.39,3647.6142,21828,0 +2044,2415,157.07,3793.2405,22653,0 +2045,2452,160.84,3943.7968,23507,-0.07 +2046,2488,164.7,4097.736,24391,-0.12 +2047,2527,168.65,4261.7855,25313,-0.18 +2048,2567,172.7,4433.209,26270,-0.23 +2049,2609,176.85,4614.0165,27263,-0.27 +2050,2652,181.09,4802.5068,28300,-0.32 +2051,2696,185.44,4999.4624,29376,-0.36 +2052,2743,189.89,5208.6827,30494,-0.4 +2053,2792,194.44,5428.7648,31661,-0.43 +2054,2842,199.11,5658.7062,32869,-1 +2055,2895,203.89,5902.6155,34124,-1.01 +2056,2950,208.78,6159.01,35432,-1.01 +2057,3007,213.79,6428.6653,36790,-1.02 +2058,3066,218.93,6712.3938,38201,-1.03 +2059,3125,224.18,7005.625,39670,-1.04 +2060,3184,229.56,7309.1904,41196,-1.04 +2061,3243,235.07,7623.3201,42782,-1.05 +2062,3303,240.71,7950.6513,44429,-1.06 +2063,3362,246.49,8286.9938,46136,-1.06 +2064,3422,252.4,8637.128,47902,-1.07 +2065,3483,258.46,9002.1618,49733,-1.07 +2066,3544,264.66,9379.5504,51631,-1.08 +2067,3607,271.02,9775.6914,53598,-1.09 +2068,3670,277.52,10184.984,55637,-1.09 +2069,3735,284.18,10614.123,57746,-1.1 +2070,3801,291,11060.91,59930,-1.1 +2071,3867,297.99,11523.2733,62196,-1.11 +2072,3934,305.14,12004.2076,64543,-1.12 +2073,4002,312.46,12504.6492,66975,-1.12 +2074,4071,319.96,13025.5716,69501,-1.13 +2075,4139,327.64,13561.0196,72131,-1.13 +2076,4206,335.5,14111.13,74862,-1.14 +2077,4273,343.55,14679.8915,77698,-1.14 +2078,4339,351.8,15264.602,80650,-1.14 +2079,4403,360.24,15861.3672,83727,-1.15 +2080,4467,368.89,16478.3163,86933,-1.15 +2081,4530,377.74,17111.622,90268,-1.15 +2082,4593,386.81,17766.1833,93749,-1.15 +2083,4655,396.09,18437.9895,97381,-1.15 +2084,4716,405.6,19128.096,101163,-1.15 +2085,4775,415.33,19832.0075,105104,-1.15 +2086,4833,425.3,20554.749,109217,-1.14 +2087,4891,435.51,21300.7941,113504,-1.14 +2088,4948,445.96,22066.1008,117973,-1.14 +2089,5006,456.66,22860.3996,122629,-1.13 +2090,5064,467.62,23680.2768,127477,-1.13 +2091,5125,478.84,24540.55,132518,-1.13 +2092,5188,490.34,25438.8392,137764,-1.12 +2093,5254,502.1,26380.334,143215,-1.12 +2094,5323,514.16,27368.7368,148876,-1.12 +2095,5396,526.49,28409.4004,154754,-1.12 +2096,5472,539.13,29501.1936,160855,-1.12 +2097,5551,552.07,30645.4057,167185,-1.11 +2098,5633,565.32,31844.4756,173750,-1.11 +2099,5719,578.89,33106.7191,180557,-1.12 +2100,5809,592.78,34434.5902,187614,-1.12 diff --git a/tests/test_h6_reform.py b/tests/test_h6_reform.py new file mode 100644 index 00000000..7253ed97 --- /dev/null +++ b/tests/test_h6_reform.py @@ -0,0 +1,248 @@ +""" +Tests for H6 Social Security reform threshold crossover logic. + +The H6 reform phases out OASDI taxation from 2045-2053 while preserving +HI taxation. This requires careful handling when OASDI thresholds rise +above HI thresholds, necessitating parameter swapping. + +These tests validate the mathematical logic without requiring full +policyengine imports (which need heavy dependencies like torch). +""" + +import pytest + + +# Constants from the H6 reform implementation +HI_SINGLE = 34_000 +HI_JOINT = 44_000 + + +def calculate_oasdi_thresholds(year: int) -> tuple[int, int]: + """Calculate OASDI thresholds for a given year during phase-out.""" + if year < 2045 or year > 2053: + raise ValueError("Phase-out only applies to 2045-2053") + + i = year - 2045 + oasdi_single = 32_500 + (7_500 * i) + oasdi_joint = 65_000 + (15_000 * i) + return oasdi_single, oasdi_joint + + +def get_swapped_thresholds( + oasdi_threshold: int, hi_threshold: int +) -> tuple[int, int]: + """ + Apply min/max swap to handle threshold crossover. + + Returns (base_threshold, adjusted_threshold) where base <= adjusted. + """ + return min(oasdi_threshold, hi_threshold), max( + oasdi_threshold, hi_threshold + ) + + +def needs_crossover_swap(oasdi_threshold: int, hi_threshold: int) -> bool: + """Check if OASDI threshold has crossed above HI threshold.""" + return oasdi_threshold > hi_threshold + + +class TestH6ThresholdCalculation: + """Test OASDI threshold progression during phase-out.""" + + def test_2045_single_threshold(self): + """2045 single OASDI threshold should be $32,500.""" + oasdi_single, _ = calculate_oasdi_thresholds(2045) + assert oasdi_single == 32_500 + + def test_2045_joint_threshold(self): + """2045 joint OASDI threshold should be $65,000.""" + _, oasdi_joint = calculate_oasdi_thresholds(2045) + assert oasdi_joint == 65_000 + + def test_2053_single_threshold(self): + """2053 single OASDI threshold should be $92,500.""" + oasdi_single, _ = calculate_oasdi_thresholds(2053) + assert oasdi_single == 92_500 + + def test_2053_joint_threshold(self): + """2053 joint OASDI threshold should be $185,000.""" + _, oasdi_joint = calculate_oasdi_thresholds(2053) + assert oasdi_joint == 185_000 + + def test_threshold_progression_single(self): + """Single thresholds should increase by $7,500 per year.""" + expected = { + 2045: 32_500, + 2046: 40_000, + 2047: 47_500, + 2048: 55_000, + 2049: 62_500, + 2050: 70_000, + 2051: 77_500, + 2052: 85_000, + 2053: 92_500, + } + for year, expected_val in expected.items(): + oasdi_single, _ = calculate_oasdi_thresholds(year) + assert oasdi_single == expected_val, f"Year {year}" + + def test_threshold_progression_joint(self): + """Joint thresholds should increase by $15,000 per year.""" + expected = { + 2045: 65_000, + 2046: 80_000, + 2047: 95_000, + 2048: 110_000, + 2049: 125_000, + 2050: 140_000, + 2051: 155_000, + 2052: 170_000, + 2053: 185_000, + } + for year, expected_val in expected.items(): + _, oasdi_joint = calculate_oasdi_thresholds(year) + assert oasdi_joint == expected_val, f"Year {year}" + + +class TestH6ThresholdCrossover: + """Test the threshold crossover detection and handling. + + Key insight: During phase-out, OASDI thresholds rise above HI thresholds. + - HI thresholds are frozen at $34k single / $44k joint + - Joint filers cross immediately (2045: $65k > $44k) + - Single filers cross in 2046 ($40k > $34k) + """ + + def test_2045_single_no_crossover(self): + """In 2045, single OASDI ($32.5k) is below HI ($34k) - no swap needed.""" + oasdi_single, _ = calculate_oasdi_thresholds(2045) + assert not needs_crossover_swap(oasdi_single, HI_SINGLE) + assert oasdi_single < HI_SINGLE + + def test_2045_joint_has_crossover(self): + """In 2045, joint OASDI ($65k) exceeds HI ($44k) - swap needed.""" + _, oasdi_joint = calculate_oasdi_thresholds(2045) + assert needs_crossover_swap(oasdi_joint, HI_JOINT) + assert oasdi_joint > HI_JOINT + + def test_2046_single_has_crossover(self): + """In 2046, single OASDI ($40k) exceeds HI ($34k) - swap needed.""" + oasdi_single, _ = calculate_oasdi_thresholds(2046) + assert needs_crossover_swap(oasdi_single, HI_SINGLE) + assert oasdi_single > HI_SINGLE + + def test_all_years_joint_crossover(self): + """Joint filers have crossover in all phase-out years.""" + for year in range(2045, 2054): + _, oasdi_joint = calculate_oasdi_thresholds(year) + assert needs_crossover_swap(oasdi_joint, HI_JOINT), f"Year {year}" + + def test_single_crossover_starts_2046(self): + """Single filers cross over starting in 2046.""" + # 2045: no crossover + oasdi_2045, _ = calculate_oasdi_thresholds(2045) + assert not needs_crossover_swap(oasdi_2045, HI_SINGLE) + + # 2046+: crossover + for year in range(2046, 2054): + oasdi_single, _ = calculate_oasdi_thresholds(year) + assert needs_crossover_swap( + oasdi_single, HI_SINGLE + ), f"Year {year}" + + +class TestH6ThresholdSwapping: + """Test min/max swap ensures base <= adjusted_base.""" + + def test_swap_when_oasdi_higher(self): + """When OASDI > HI, swap puts HI in base slot.""" + oasdi = 65_000 + hi = 44_000 + base, adjusted = get_swapped_thresholds(oasdi, hi) + assert base == hi == 44_000 + assert adjusted == oasdi == 65_000 + assert base <= adjusted + + def test_no_swap_when_oasdi_lower(self): + """When OASDI < HI, no swap needed.""" + oasdi = 32_500 + hi = 34_000 + base, adjusted = get_swapped_thresholds(oasdi, hi) + assert base == oasdi == 32_500 + assert adjusted == hi == 34_000 + assert base <= adjusted + + def test_swap_preserves_ordering_all_years(self): + """Swapped thresholds always maintain base <= adjusted.""" + for year in range(2045, 2054): + oasdi_single, oasdi_joint = calculate_oasdi_thresholds(year) + + base_s, adj_s = get_swapped_thresholds(oasdi_single, HI_SINGLE) + base_j, adj_j = get_swapped_thresholds(oasdi_joint, HI_JOINT) + + assert base_s <= adj_s, f"Single ordering violated in {year}" + assert base_j <= adj_j, f"Joint ordering violated in {year}" + + +class TestH6RateSwapping: + """Test rate swapping logic during transition. + + Key insight: PolicyEngine requires one rate structure per year. + When thresholds cross, we swap to (0.35, 0.85) to minimize error. + """ + + def test_2045_error_analysis(self): + """In 2045, swapped rates minimize error vs default rates.""" + # 2045 situation: + # Single: OASDI=$32.5k, HI=$34k -> $1.5k range affected + # Joint: OASDI=$65k, HI=$44k -> $21k range affected + + # With swapped rates (0.35/0.85 instead of 0.50/0.85): + # Single: undertax by 15% on $1.5k = $225 + # With default rates (0.50/0.85): + # Joint: overtax by 15% on $21k = $3,150 + + single_range = 34_000 - 32_500 # $1,500 + joint_range = 65_000 - 44_000 # $21,000 + + rate_diff = 0.50 - 0.35 # 15% + + single_error_swapped = single_range * rate_diff # $225 undertax + joint_error_default = joint_range * rate_diff # $3,150 overtax + + assert single_error_swapped == pytest.approx(225) + assert joint_error_default == pytest.approx(3_150) + assert joint_error_default / single_error_swapped == pytest.approx( + 14.0 + ), "Swapped rates should have 14x less error" + + def test_swapped_rates_align_with_tax_cut_intent(self): + """Swapped rates undertax (not overtax), aligning with reform intent.""" + # H6 is a tax cut - undertaxing is more aligned with legislative intent + # than overtaxing would be + single_undertax = (34_000 - 32_500) * 0.15 # $225 + assert single_undertax > 0 # Positive = undertax (taxpayer-favorable) + + +class TestH6EliminationPhase: + """Test the post-2054 elimination phase parameters.""" + + def test_elimination_thresholds(self): + """After 2054, only HI thresholds remain active.""" + # Base thresholds = HI ($34k/$44k) + # Adjusted thresholds = very high (effectively disabled) + INFINITY_THRESHOLD = 9_999_999 + + assert HI_SINGLE == 34_000 + assert HI_JOINT == 44_000 + assert INFINITY_THRESHOLD > HI_SINGLE * 100 + assert INFINITY_THRESHOLD > HI_JOINT * 100 + + def test_elimination_rates(self): + """After 2054, both tiers use 35% (HI-only rate).""" + HI_RATE = 0.35 + OASDI_RATE = 0.50 # eliminated + + # In elimination phase, tier 1 = 35%, tier 2 = 35% (no additional) + assert HI_RATE == 0.35 + assert HI_RATE + OASDI_RATE == 0.85 # was combined rate