diff --git a/changelog_entry.yaml b/changelog_entry.yaml
index 0f82eb65..c3810620 100644
--- a/changelog_entry.yaml
+++ b/changelog_entry.yaml
@@ -1,4 +1,5 @@
-- bump: patch
+- bump: minor
   changes:
-    fixed:
-    - Versioning workflow checkout for push events
+    added:
+    - Support for health_insurance_premiums_without_medicare_part_b in local area calibration
+    - Test coverage for sparse matrix builder with person-level targets
diff --git a/docs/local_area_calibration_setup.ipynb b/docs/local_area_calibration_setup.ipynb
index cdd1cc97..9060a3df 100644
--- a/docs/local_area_calibration_setup.ipynb
+++ b/docs/local_area_calibration_setup.ipynb
@@ -459,10 +459,10 @@
     "print(\"Remember, this is a North Carolina target:\\n\")\n",
     "print(targets_df.iloc[row_loc])\n",
     "\n",
-    "print(\"\\nHousehold donated to NC's 2nd district, 2023 SNAP dollars:\")\n",
+    "print(\"\\nNC State target. Household donated to NC's 2nd district, 2023 SNAP dollars:\")\n",
     "print(X_sparse[row_loc, positions['3702']])  # Household donated to NC's 2nd district\n",
     "\n",
-    "print(\"\\nHousehold donated to NC's 2nd district, 2023 SNAP dollars:\")\n",
+    "print(\"\\nSame target, same household, donated to AK's at Large district, 2023 SNAP dollars:\")\n",
     "print(X_sparse[row_loc, positions['201']])  # Household donated to AK's at Large District"
    ]
   },
diff --git a/modal_app/fit_weights.py b/modal_app/fit_weights.py
new file mode 100644
index 00000000..842223a1
--- /dev/null
+++ b/modal_app/fit_weights.py
@@ -0,0 +1,151 @@
+import os
+import subprocess
+import modal
+
+app = modal.App("policyengine-us-data-fit-weights")
+
+hf_secret = modal.Secret.from_name("huggingface-token")
+
+image = (
+    modal.Image.debian_slim(python_version="3.11")
+    .apt_install("git")
+    .pip_install("uv")
+)
+
+REPO_URL = "https://github.com/PolicyEngine/policyengine-us-data.git"
+
+
+def _fit_weights_impl(branch: str, epochs: int) -> bytes:
+    """Shared implementation for weight fitting."""
+    os.chdir("/root")
+    subprocess.run(["git", "clone", "-b", branch, REPO_URL], check=True)
+    os.chdir("policyengine-us-data")
+
+    subprocess.run(["uv", "sync", "--extra", "l0"], check=True)
+
+    print("Downloading calibration inputs from HuggingFace...")
+    download_result = subprocess.run(
+        [
+            "uv", "run", "python", "-c",
+            "from policyengine_us_data.utils.huggingface import "
+            "download_calibration_inputs; "
+            "paths = download_calibration_inputs('/root/calibration_data'); "
+            "print(f\"DB: {paths['database']}\"); "
+            "print(f\"DATASET: {paths['dataset']}\")"
+        ],
+        capture_output=True,
+        text=True,
+        env=os.environ.copy(),
+    )
+    print(download_result.stdout)
+    if download_result.stderr:
+        print("Download STDERR:", download_result.stderr)
+    if download_result.returncode != 0:
+        raise RuntimeError(f"Download failed: {download_result.returncode}")
+
+    db_path = dataset_path = None
+    for line in download_result.stdout.split('\n'):
+        if line.startswith('DB:'):
+            db_path = line.split('DB:')[1].strip()
+        elif line.startswith('DATASET:'):
+            dataset_path = line.split('DATASET:')[1].strip()
+
+    script_path = (
+        "policyengine_us_data/datasets/cps/"
+        "local_area_calibration/fit_calibration_weights.py"
+    )
+    result = subprocess.run(
+        [
+            "uv", "run", "python", script_path,
+            "--device", "cuda",
+            "--epochs", str(epochs),
+            "--db-path", db_path,
+            "--dataset-path", dataset_path,
+        ],
+        capture_output=True,
+        text=True,
+        env=os.environ.copy(),
+    )
+    print(result.stdout)
+    if result.stderr:
+        print("STDERR:", result.stderr)
+    if result.returncode != 0:
+        raise RuntimeError(f"Script failed with code {result.returncode}")
+
+    output_line = [
+        line for line in result.stdout.split('\n') if 'OUTPUT_PATH:' in line
+    ][0]
+    output_path = output_line.split('OUTPUT_PATH:')[1].strip()
+
+    with open(output_path, 'rb') as f:
+        return f.read()
+
+
+@app.function(
+    image=image, secrets=[hf_secret], memory=32768, cpu=4.0,
+    gpu="T4", timeout=14400,
+)
+def fit_weights_t4(branch: str = "main", epochs: int = 200) -> bytes:
+    return _fit_weights_impl(branch, epochs)
+
+
+@app.function(
+    image=image, secrets=[hf_secret], memory=32768, cpu=4.0,
+    gpu="A10", timeout=14400,
+)
+def fit_weights_a10(branch: str = "main", epochs: int = 200) -> bytes:
+    return _fit_weights_impl(branch, epochs)
+
+
+@app.function(
+    image=image, secrets=[hf_secret], memory=32768, cpu=4.0,
+    gpu="A100-40GB", timeout=14400,
+)
+def fit_weights_a100_40(branch: str = "main", epochs: int = 200) -> bytes:
+    return _fit_weights_impl(branch, epochs)
+
+
+@app.function(
+    image=image, secrets=[hf_secret], memory=32768, cpu=4.0,
+    gpu="A100-80GB", timeout=14400,
+)
+def fit_weights_a100_80(branch: str = "main", epochs: int = 200) -> bytes:
+    return _fit_weights_impl(branch, epochs)
+
+
+@app.function(
+    image=image, secrets=[hf_secret], memory=32768, cpu=4.0,
+    gpu="H100", timeout=14400,
+)
+def fit_weights_h100(branch: str = "main", epochs: int = 200) -> bytes:
+    return _fit_weights_impl(branch, epochs)
+
+
+GPU_FUNCTIONS = {
+    "T4": fit_weights_t4,
+    "A10": fit_weights_a10,
+    "A100-40GB": fit_weights_a100_40,
+    "A100-80GB": fit_weights_a100_80,
+    "H100": fit_weights_h100,
+}
+
+
+@app.local_entrypoint()
+def main(
+    branch: str = "main",
+    epochs: int = 200,
+    gpu: str = "T4",
+    output: str = "calibration_weights.npy"
+):
+    if gpu not in GPU_FUNCTIONS:
+        raise ValueError(
+            f"Unknown GPU: {gpu}. Choose from: {list(GPU_FUNCTIONS.keys())}"
+        )
+
+    print(f"Running with GPU: {gpu}, epochs: {epochs}, branch: {branch}")
+    func = GPU_FUNCTIONS[gpu]
+    weights_bytes = func.remote(branch=branch, epochs=epochs)
+
+    with open(output, 'wb') as f:
+        f.write(weights_bytes)
+    print(f"Weights saved to: {output}")
diff --git a/policyengine_us_data/datasets/cps/local_area_calibration/ADDING_CALIBRATION_TARGETS.md b/policyengine_us_data/datasets/cps/local_area_calibration/ADDING_CALIBRATION_TARGETS.md
new file mode 100644
index 00000000..59d486f9
--- /dev/null
+++ b/policyengine_us_data/datasets/cps/local_area_calibration/ADDING_CALIBRATION_TARGETS.md
@@ -0,0 +1,290 @@
+# Adding Calibration Targets to Local Area Geo-Stacking
+
+This document summarizes key learnings from adding `health_insurance_premiums_without_medicare_part_b` as a calibration target. Use this as a reference when adding future targets.
+
+## Key Discovery: No Code Changes Needed for Most Targets
+
+The `sparse_matrix_builder.py` is **already entity-agnostic**. PolicyEngine's `map_to="household"` parameter automatically handles aggregation from any entity level (person, tax_unit, spm_unit) to household level.
+
+```python
+# This line in sparse_matrix_builder.py (line 220-222) handles ALL entity types:
+target_values = state_sim.calculate(
+    target["variable"], map_to="household"
+).values
+```
+
+**Verified behavior:**
+- Person-level variables (like health_insurance_premiums): automatically summed to household
+- SPM-unit variables (like snap): automatically aggregated to household
+- Household variables: returned as-is
+
+## Architecture Overview
+
+### File Locations
+
+```
+policyengine_us_data/
+├── storage/
+│   ├── calibration/
+│   │   └── policy_data.db          # Target database (SQLite)
+│   └── stratified_extended_cps_2023.h5  # Base dataset for calibration
+└── datasets/cps/local_area_calibration/
+    ├── sparse_matrix_builder.py    # Builds X_sparse matrix (GENERIC)
+    ├── matrix_tracer.py            # Diagnostics for debugging matrices
+    ├── calibration_utils.py        # Helper functions
+    └── build_calibration_matrix.py # Runner script
+```
+
+### Database Schema (policy_data.db)
+
+```sql
+-- Core tables
+targets(target_id, variable, period, stratum_id, value, active, ...)
+strata(stratum_id, definition_hash, stratum_group_id, ...)
+stratum_constraints(stratum_id, constraint_variable, operation, value, ...)
+```
+
+**Key stratum_group_ids:**
+- Group 1: National hardcoded targets (20 variables including health insurance, medicaid, snap national, etc.)
+- Group 4: SNAP state/CD targets (538 targets: 51 state snap + 487 household_count)
+
+### Target Filter Logic
+
+The `build_matrix()` method uses **OR logic** for filters:
+
+```python
+# Gets SNAP targets OR health insurance target
+target_filter={
+    "stratum_group_ids": [4],  # All SNAP targets
+    "variables": ["health_insurance_premiums_without_medicare_part_b"],  # Specific variable
+}
+```
+
+## How to Add a New Target
+
+### Step 1: Check if Target Exists in Database
+
+```python
+import sqlite3
+from policyengine_us_data.storage import STORAGE_FOLDER
+
+conn = sqlite3.connect(STORAGE_FOLDER / "calibration" / "policy_data.db")
+cursor = conn.cursor()
+
+# Find your target
+cursor.execute("""
+    SELECT t.target_id, t.variable, t.value, t.period, t.stratum_id,
+           s.stratum_group_id
+    FROM targets t
+    JOIN strata s ON t.stratum_id = s.stratum_id
+    WHERE t.variable = 'your_variable_name'
+""")
+print(cursor.fetchall())
+
+# Check constraints for that stratum
+cursor.execute("""
+    SELECT * FROM stratum_constraints WHERE stratum_id = <stratum_id>
+""")
+print(cursor.fetchall())
+```
+
+### Step 2: Determine Entity Type
+
+```python
+from policyengine_us import Microsimulation
+
+sim = Microsimulation()
+var = sim.tax_benefit_system.variables['your_variable_name']
+print(f"Entity: {var.entity.key}")  # person, household, tax_unit, spm_unit, etc.
+```
+
+### Step 3: Verify Aggregation Works
+
+```python
+# For non-household variables, verify totals are preserved
+person_total = sim.calculate('your_variable', 2023, map_to='person').values.sum()
+household_total = sim.calculate('your_variable', 2023, map_to='household').values.sum()
+print(f"Match: {np.isclose(person_total, household_total, rtol=1e-6)}")
+```
+
+### Step 4: Update the Runner Script
+
+Edit `build_calibration_matrix.py` to include your new target:
+
+```python
+targets_df, X_sparse, household_id_mapping = builder.build_matrix(
+    sim,
+    target_filter={
+        "stratum_group_ids": [4],  # SNAP
+        "variables": [
+            "health_insurance_premiums_without_medicare_part_b",
+            "your_new_variable",  # Add here
+        ],
+    },
+)
+```
+
+### Step 5: Run and Verify
+
+```bash
+cd policyengine_us_data/datasets/cps/local_area_calibration
+python build_calibration_matrix.py
+```
+
+## When Code Changes ARE Needed
+
+The current implementation may need modification for:
+
+1. **Count variables with special semantics**: Variables ending in `_count` might need `.nunique()` instead of `.sum()` for aggregation. The junkyard implementation handles this but our current builder doesn't.
+
+2. **Variables with state-specific calculations**: SNAP and Medicaid are already handled (state_fips is set before calculation). Other state-dependent variables should work the same way.
+
+3. **Constraint evaluation at non-household level**: Currently all constraints are evaluated at household level after aggregation. If you need person-level constraint evaluation (e.g., "only count persons with income > X"), the junkyard has this pattern but our builder doesn't.
+
+## The Junkyard Reference
+
+Location: `~/devl/policyengine-us-data/policyengine_us_data/datasets/cps/local_area_calibration/metrics_matrix_geo_stacking_sparse.py`
+
+This 2,400+ line file has extensive logic we intentionally avoided:
+- Hard-coded variable names and stratum_group_ids
+- Complex entity relationship tracking
+- Person-level constraint evaluation with `.any()` aggregation
+
+**Key pattern from junkyard (if ever needed):**
+```python
+# Dynamic entity detection
+target_entity = sim.tax_benefit_system.variables[target_variable].entity.key
+
+# Entity relationship DataFrame
+entity_rel = pd.DataFrame({
+    "person_id": sim.calculate("person_id", map_to="person").values,
+    "household_id": sim.calculate("household_id", map_to="person").values,
+    "tax_unit_id": sim.calculate("tax_unit_id", map_to="person").values,
+    # ... other entities
+})
+
+# For counts: use .nunique() on entity IDs
+# For amounts: use .sum() on values
+```
+
+## Matrix Structure
+
+The sparse matrix X has shape `(n_targets, n_households × n_cds)`:
+
+```
+Columns: [CD1_hh0, CD1_hh1, ..., CD1_hhN, CD2_hh0, ..., CDM_hhN]
+Rows: One per target (geographic_id + variable combination)
+
+Column index formula: col_idx = cd_idx * n_households + hh_idx
+```
+
+Use `MatrixTracer` for debugging:
+```python
+from matrix_tracer import MatrixTracer
+
+tracer = MatrixTracer(targets_df, X_sparse, household_id_mapping, cds_to_calibrate, sim)
+tracer.print_matrix_structure()
+tracer.get_column_info(100)  # Info about column 100
+tracer.get_row_info(0)       # Info about row 0 (first target)
+```
+
+## Environment Setup
+
+```bash
+# Use the sep environment for this repo
+source ~/envs/sep/bin/activate
+
+# Run from the local_area_calibration directory
+cd ~/devl/sep/policyengine-us-data/policyengine_us_data/datasets/cps/local_area_calibration
+
+# Run tests
+pytest ../../tests/test_sparse_matrix_builder.py -v
+```
+
+## Common Queries
+
+### List all target variables
+```sql
+SELECT DISTINCT variable FROM targets;
+```
+
+### List all constraint variables
+```sql
+SELECT DISTINCT constraint_variable FROM stratum_constraints;
+```
+
+### Find targets by geographic level
+```sql
+-- National targets (no geographic constraints)
+SELECT t.* FROM targets t
+JOIN strata s ON t.stratum_id = s.stratum_id
+WHERE t.stratum_id NOT IN (
+    SELECT stratum_id FROM stratum_constraints
+    WHERE constraint_variable IN ('state_fips', 'congressional_district_geoid')
+);
+
+-- State-level targets
+SELECT t.* FROM targets t
+WHERE t.stratum_id IN (
+    SELECT stratum_id FROM stratum_constraints
+    WHERE constraint_variable = 'state_fips'
+);
+```
+
+## Summary
+
+For most new targets:
+1. Verify target exists in `policy_data.db`
+2. Add variable name to the target filter in `build_calibration_matrix.py`
+3. Run and verify with `MatrixTracer`
+
+No code changes to `sparse_matrix_builder.py` needed unless you have special aggregation or constraint requirements.
+
+## Running Weight Calibration on Modal (GPU)
+
+The `fit_calibration_weights.py` script can be run on Modal with GPU acceleration using `modal_app/fit_weights.py`.
+
+### Basic Usage
+
+```bash
+# Default: T4 GPU, 200 epochs
+modal run modal_app/fit_weights.py --branch main --epochs 200
+
+# Specify GPU type
+modal run modal_app/fit_weights.py --branch main --epochs 2000 --gpu A100-40GB
+```
+
+### GPU Benchmarks (200 epochs, 2 target groups, Jan 2026)
+
+| GPU | Time | Cost | Notes |
+|-----|------|------|-------|
+| T4 | 16m 4s | $0.16 | Best for small test runs |
+| A100-40GB | 9m 5s | $0.32 | ~44% faster |
+| A100-80GB | 10m 28s | $0.44 | Slower than 40GB (variance?) |
+
+### Key Findings
+
+1. **Memory bandwidth matters for sparse operations**: The P100 (not available on Modal) outperforms T4 by ~2x on Kaggle due to HBM2 memory (~732 GB/s) vs GDDR6 (~320 GB/s).
+
+2. **Significant overhead at low epochs**: With only 200 epochs, much of the runtime is fixed overhead:
+   - Git clone and `uv sync` (~2-3 min)
+   - HuggingFace data download (~1 min)
+   - Loading Microsimulation and building sparse matrix (~3-4 min, CPU-bound)
+
+3. **GPU choice depends on epoch count**:
+   - **< 500 epochs**: Use T4 (cheapest, overhead dominates)
+   - **500-2000 epochs**: A100-40GB may break even
+   - **> 2000 epochs**: A100 likely more cost-effective as training dominates
+
+4. **Available Modal GPUs** (by memory bandwidth):
+   - T4: 320 GB/s, $0.000164/sec
+   - L4: 300 GB/s, $0.000222/sec
+   - A10: 600 GB/s, $0.000306/sec
+   - L40S: 864 GB/s, $0.000542/sec
+   - A100-40GB: 1,555 GB/s, $0.000583/sec
+   - A100-80GB: 2,039 GB/s, $0.000694/sec
+   - H100: 3,350 GB/s, $0.001097/sec
+
+### Output
+
+Weights are saved locally to `calibration_weights.npy` (configurable via `--output` flag).
diff --git a/policyengine_us_data/datasets/cps/local_area_calibration/build_calibration_matrix.py b/policyengine_us_data/datasets/cps/local_area_calibration/build_calibration_matrix.py
new file mode 100644
index 00000000..185914a5
--- /dev/null
+++ b/policyengine_us_data/datasets/cps/local_area_calibration/build_calibration_matrix.py
@@ -0,0 +1,195 @@
+"""
+Build calibration matrix for geo-stacking reweighting.
+Generates X_sparse and target vector, prints diagnostics using MatrixTracer.
+"""
+
+import logging
+
+logging.basicConfig(
+    level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
+)
+
+import numpy as np
+import pandas as pd
+from policyengine_us import Microsimulation
+from policyengine_us_data.storage import STORAGE_FOLDER
+from sparse_matrix_builder import SparseMatrixBuilder
+from matrix_tracer import MatrixTracer
+from calibration_utils import create_target_groups
+
+# ============================================================================
+# CONFIGURATION
+# ============================================================================
+db_path = STORAGE_FOLDER / "calibration" / "policy_data.db"
+db_uri = f"sqlite:///{db_path}"
+time_period = 2023
+
+# Base dataset for geo-stacking: stratified extended CPS
+dataset_path = STORAGE_FOLDER / "stratified_extended_cps_2023.h5"
+
+cds_to_calibrate = [
+    "101",  # Alabama CD-1
+    "601",  # California CD-1
+    "602",  # California CD-2
+    "3601",  # New York CD-1
+    "4801",  # Texas CD-1
+]
+
+print(f"Testing with {len(cds_to_calibrate)} congressional districts")
+
+# ============================================================================
+# STEP 1: LOAD SIMULATION FROM EXTENDED CPS
+# ============================================================================
+print(f"\nLoading simulation from {dataset_path}...")
+sim = Microsimulation(dataset=str(dataset_path))
+n_households = len(sim.calculate("household_id", map_to="household").values)
+print(f"Loaded {n_households:,} households")
+
+# ============================================================================
+# STEP 2: BUILD SPARSE MATRIX WITH COMBINED TARGETS
+# ============================================================================
+print("\nBuilding sparse matrix...")
+builder = SparseMatrixBuilder(
+    db_uri=db_uri,
+    time_period=time_period,
+    cds_to_calibrate=cds_to_calibrate,
+    dataset_path=None,
+)
+
+# SNAP targets (stratum_group_id=4) + specific health insurance variable
+# Uses OR logic: gets all SNAP targets OR the health insurance target
+targets_df, X_sparse, household_id_mapping = builder.build_matrix(
+    sim,
+    target_filter={
+        "stratum_group_ids": [4],
+        "variables": ["health_insurance_premiums_without_medicare_part_b"],
+    },
+)
+
+print(f"\nMatrix built successfully:")
+print(f"  Shape: {X_sparse.shape}")
+print(f"  Targets: {len(targets_df)}")
+nnz = X_sparse.nnz
+total = X_sparse.shape[0] * X_sparse.shape[1]
+print(f"  Sparsity: {1 - nnz / total:.4%}")
+
+# ============================================================================
+# STEP 3: EXTRACT TARGET VECTOR
+# ============================================================================
+target_vector = targets_df["value"].values
+print(f"\nTarget vector shape: {target_vector.shape}")
+print(f"Target total: ${target_vector.sum():,.0f}")
+
+# ============================================================================
+# STEP 4: HEALTH INSURANCE PREMIUM VERIFICATION
+# ============================================================================
+print("\n" + "=" * 80)
+print("HEALTH INSURANCE PREMIUM TARGET ANALYSIS")
+print("=" * 80)
+
+health_ins_targets = targets_df[
+    targets_df["variable"]
+    == "health_insurance_premiums_without_medicare_part_b"
+]
+
+if len(health_ins_targets) > 0:
+    print(f"\nFound {len(health_ins_targets)} health insurance target(s):")
+    print(
+        health_ins_targets[
+            [
+                "target_id",
+                "variable",
+                "value",
+                "geographic_id",
+                "stratum_group_id",
+            ]
+        ]
+    )
+
+    health_ins_idx = health_ins_targets.index[0]
+    health_ins_row = X_sparse[health_ins_idx, :]
+
+    print(f"\nMatrix row {health_ins_idx} (health insurance):")
+    print(f"  Non-zero entries: {health_ins_row.nnz:,}")
+    print(f"  Row sum: ${health_ins_row.sum():,.0f}")
+    print(f"  Target value: ${health_ins_targets.iloc[0]['value']:,.0f}")
+
+    person_total = sim.calculate(
+        "health_insurance_premiums_without_medicare_part_b",
+        time_period,
+        map_to="person",
+    ).values.sum()
+
+    household_total = sim.calculate(
+        "health_insurance_premiums_without_medicare_part_b",
+        time_period,
+        map_to="household",
+    ).values.sum()
+
+    print(f"\nEntity aggregation verification:")
+    print(f"  Person-level total:    ${person_total:,.0f}")
+    print(f"  Household-level total: ${household_total:,.0f}")
+    print(f"  Match: {np.isclose(person_total, household_total, rtol=1e-6)}")
+
+else:
+    print("\nWARNING: No health insurance targets found!")
+
+# ============================================================================
+# STEP 5: SNAP TARGET SUMMARY
+# ============================================================================
+print("\n" + "=" * 80)
+print("SNAP TARGET SUMMARY")
+print("=" * 80)
+
+snap_targets = targets_df[targets_df["variable"] == "snap"]
+household_count_targets = targets_df[
+    targets_df["variable"] == "household_count"
+]
+
+print(f"\nSNAP benefit targets: {len(snap_targets)}")
+print(f"Household count targets: {len(household_count_targets)}")
+
+if len(snap_targets) > 0:
+    print(f"\nSNAP total (all states): ${snap_targets['value'].sum():,.0f}")
+    print(f"\nSample SNAP targets:")
+    print(
+        snap_targets[["target_id", "variable", "value", "geographic_id"]].head(
+            10
+        )
+    )
+
+# ============================================================================
+# STEP 6: USE MATRIX TRACER FOR DETAILED DIAGNOSTICS
+# ============================================================================
+print("\n" + "=" * 80)
+print("MATRIX TRACER DIAGNOSTICS")
+print("=" * 80)
+
+tracer = MatrixTracer(
+    targets_df=targets_df,
+    matrix=X_sparse,
+    household_id_mapping=household_id_mapping,
+    geographic_ids=cds_to_calibrate,
+    sim=sim,
+)
+
+tracer.print_matrix_structure(show_groups=True)
+
+# ============================================================================
+# STEP 7: TARGET GROUP ANALYSIS
+# ============================================================================
+print("\n" + "=" * 80)
+print("TARGET GROUP ANALYSIS")
+print("=" * 80)
+
+target_groups, group_info = create_target_groups(targets_df)
+
+print(f"\nTotal target groups: {len(group_info)}")
+for group_id, info in enumerate(group_info):
+    group_mask = target_groups == group_id
+    n_targets_in_group = group_mask.sum()
+    print(f"  Group {group_id}: {info} ({n_targets_in_group} targets)")
+
+print("\n" + "=" * 80)
+print("RUNNER COMPLETED SUCCESSFULLY")
+print("=" * 80)
diff --git a/policyengine_us_data/datasets/cps/local_area_calibration/conversation_about_this_repo.txt b/policyengine_us_data/datasets/cps/local_area_calibration/conversation_about_this_repo.txt
new file mode 100644
index 00000000..3242ef91
--- /dev/null
+++ b/policyengine_us_data/datasets/cps/local_area_calibration/conversation_about_this_repo.txt
@@ -0,0 +1,2147 @@
+
+ ▐▛███▜▌   Claude Code v2.1.7
+▝▜█████▛▘  Haiku 4.5 · Claude Max
+  ▘▘ ▝▝    ~/…/policyengine_us_data/datasets/cps/local_area_calibration
+
+❯ We need to bring in a new target to our calibration efforts, and this     
+  will be manifested in the X matrix that is created from                   
+  sparse_matrix_builder.py in this directory. We have only test `snap` so   
+  far, and you can see that demonstrated in                                 
+  ~/docs/local_area_calibration_setup.ipynb. But now we want to bring in    
+  health_insurance_premiums_without_medicare_part_b. We want to fully       
+  understand what kind of variable this is from our target database (used   
+  to construct the target vector), which you can find in                    
+  policyengine_us_data/storage/calibration/policy_data.db and see examples  
+  for how to query it at                                                    
+  /home/baogorek/devl/code-snippets/reweighting/querying_policy_data_db.md. 
+  This variable is defined at a different unit of analysis than `snap`, so  
+  you need to be mindful of that. One more thing to be aware of: while we   
+  are branched off the mainline in this repo, clone at                      
+  ~/devl/sep/policyengine-us-data, there is a "junkyard" clone of this same 
+  repo at ~/devl/policyengine-us-data, which contains                       
+  /home/baogorek/devl/policyengine-us-data/policyengine_us_data/datasets/cp 
+  s/local_area_calibration/metrics_matrix_geo_stacking_sparse.py, a massive 
+  2,000+ line file that contains logic to handle all of our targets, but    
+  it does it in a very idiosycratic way, hard-coding target names in the    
+  module. Our sparse_matrix_builder.py must never do this! We want to use   
+  our policy_data.db to handle any target and create that sparse matrix.    
+  Due to the size of the matrix creator in the "junkyard", and the          
+  existance of two repos, this is a difficult task. Use the uv environment  
+  ~/envs/sep for this branch and task, and use ~/envs/pe for the            
+  "junkyard". Plan out the addition of this new target, and Ultrathink!     
+
+● I'll help you plan the addition of the health_insurance_premiums_without_medicare_part_b target to your calibration system. This is a complex task that requires understanding multiple components across two repositories.
+
+  Let me start by exploring the codebase to understand the current implementation and requirements.
+
+● 3 Explore agents finished (ctrl+o to expand)
+   ├─ Understand current sparse_matrix_builder implementation · 17 tool uses ·
+   27.6k tokens
+   │  ⎿  Done
+   ├─ Understand policy_data.db structure · 40 tool uses · 40.6k tokens
+   │  ⎿  Done
+   └─ Explore junkyard implementation · 40 tool uses · 52.5k tokens
+      ⎿  Done
+
+● Excellent! I've gathered comprehensive information about the current implementation, database structure, and junkyard patterns. Now let me launch a Plan agent to design the implementation approach.
+
+● Plan(Design health insurance premiums integration)
+  ⎿  Done (36 tool uses · 46.4k tokens · 4m 36s)                         
+
+
+● Let me verify the Plan agent's findings by reading the key files directly.
+
+● Read(sparse_matrix_builder.py)
+  ⎿  Read 239 lines
+
+● Read(~/docs/local_area_calibration_setup.ipynb)
+  ⎿  Error reading file
+
+● Bash(cd /home/baogorek/devl/sep/policyengine-us-data && ~/envs/sep/bin/python
+      -c "
+      import sqlite3…)
+  ⎿ === Health Insurance Premium Targets ===                             
+    (5, 'health_insurance_premiums_without_medicare_part_b',
+    385000000000.0
+    , 2024, 1, 1)
+    … +3 lines (ctrl+o to expand)
+  ⎿ Shell cwd was reset to /home/baogorek/devl/sep/policyengine-us-data/po
+    licyengine_us_data/datasets/cps/local_area_calibration
+
+● Search(pattern: "~/docs/*.ipynb")
+  ⎿  Found 0 files 
+
+● Read(~/devl/code-snippets/reweighting/querying_policy_data_db.md)
+  ⎿  Read 96 lines
+
+● Bash(find /home/baogorek/docs -name "*.ipynb" -type f 2>/dev/null | head -10)
+  ⎿  (No content)
+
+● Bash(cd /home/baogorek/devl/sep/policyengine-us-data && ~/envs/sep/bin/python
+      -c "
+      import sqlite3…)
+  ⎿  === Stratum Group Analysis ===                                      
+
+     Stratum group 1 (health insurance stratum):
+     … +56 lines (ctrl+o to expand)
+  ⎿ Shell cwd was reset to /home/baogorek/devl/sep/policyengine-us-data/po
+    licyengine_us_data/datasets/cps/local_area_calibration
+
+● Bash(cd /home/baogorek/devl/sep/policyengine-us-data && ~/envs/sep/bin/python
+      -c "
+      from policyengine_us import Microsimulation…)
+  ⎿  === Entity Aggregation Test ===                                     
+     Person-level array shape: (52826,)
+     Household-level array shape: (21045,)
+     … +6 lines (ctrl+o to expand)
+  ⎿ Shell cwd was reset to /home/baogorek/devl/sep/policyengine-us-data/po
+    licyengine_us_data/datasets/cps/local_area_calibration
+
+● Excellent! I've now verified the Plan agent's key finding. Let me create the implementation plan and ask some clarifying questions.
+
+● Updated plan
+  ⎿  Wrote 267 lines to                                                  
+     ../../../../../../../.claude/plans/velvet-rolling-dawn.md
+      1 # Implementation Plan: Adding Health Insurance Premiums to Calibr
+        ation
+      2 ## Executive Summary
+      3 **Surprising Finding**: The current `sparse_matrix_builder.py` im
+        plementation **already supports person-level targets** without an
+        y code changes needed. PolicyEngine's `map_to="household"` parame
+        ter automatically aggregates person-level values to household lev
+        el while preserving totals.
+      4 ## Background
+      5 ### Current State
+      6 - **Working targets**: SNAP (stratum_group_id=4, spm_unit-level, 
+        538 targets: 51 state snap amounts + 487 household_count targets)
+     … +257 lines (ctrl+o to expand)
+                                                                         
+● User answered Claude's questions:
+  ⎿  · Do you want to calibrate using only the existing national health 
+     insurance premium target ($385B), or should state-level targets be 
+     added to the database first? → National target only (Recommended)
+     · How do you plan to use the health insurance premium target in your 
+     calibration? → Combined with SNAP
+
+     · The existing health insurance target has no geographic constraints 
+     (same target for all CDs). Is this the desired behavior? → Yes, 
+     national is correct
+
+
+● Excellent! Based on your answers, let me finalize the implementation plan.
+
+● Updated plan
+  ⎿  /plan to preview                                                    
+      250  - ✓ Total preservation is guaranteed by PolicyEngine's aggrega
+           tion logic
+      251  - ✓ Generic architecture means extensible to any future entity
+            types
+      252  
+      253 -## Next Steps (Post-Approval)                                 
+      253 +## User Decisions                                             
+      254  
+      255 -1. **Update usage examples** to show health insurance premium 
+          -calibration                                                   
+      256 -2. **Test with combined targets** (SNAP + health insurance) to
+          - verify OR filter logic                                       
+      257 -3. **Document target filtering patterns** for future reference
+      258 -4. **(Optional) Add state-level health insurance targets** to 
+          -database if geographic variation is needed                    
+      255 +✓ **Target Scope**: Use national target only (existing target_
+          +id=5, no database changes)                                    
+      256 +✓ **Usage Pattern**: Combined with SNAP (test multi-target cal
+          +ibration)                                                     
+      257 +✓ **Geographic Scope**: National aggregate is correct (no stat
+          +e variation needed)                                           
+      258  
+      260 -## Questions for User                                         
+      259 +## Implementation Steps (Post-Approval)                       
+      260  
+      262 -Before finalizing this plan, clarification needed on:         
+      261 +### Step 1: Verify Database Target                            
+      262 +Confirm health insurance premium target exists and is active: 
+      263 +```python                                                     
+      264 +# Using ~/envs/sep environment                                
+      265 +import sqlite3                                                
+      266 +conn = sqlite3.connect('policyengine_us_data/storage/calibrati
+          +on/policy_data.db')                                           
+      267 +cursor = conn.cursor()                                        
+      268 +cursor.execute("""                                            
+      269 +    SELECT target_id, variable, value, period, stratum_id, act
+          +ive                                                           
+      270 +    FROM targets                                              
+      271 +    WHERE variable = 'health_insurance_premiums_without_medica
+          +re_part_b'                                                    
+      272 +""")                                                          
+      273 +print(cursor.fetchone())                                      
+      274 +```                                                           
+      275 +Expected: `(5, 'health_insurance_premiums_without_medicare_par
+          +t_b', 385000000000.0, 2024, 1, 1)`                            
+      276  
+      264 -1. **Target Scope**: Do you want ONLY the national health insu
+          -rance target, or should state-level targets be added to the da
+          -tabase?                                                       
+      265 -2. **Usage Pattern**: Will you calibrate using health insuranc
+          -e alone, or combined with SNAP targets?                       
+      266 -3. **Geographic Constraints**: The current target has no geogr
+          -aphic constraints (applies to all CDs equally). Is this correc
+          -t, or should premiums vary by state/CD?                       
+      277 +### Step 2: Test Combined Target Matrix Building              
+      278 +Create a test script to build matrix with both SNAP and health
+          + insurance:                                                   
+      279 +                                                              
+      280 +```python                                                     
+      281 +from policyengine_us import Microsimulation                   
+      282 +from policyengine_us_data.datasets.cps.local_area_calibration.
+          +sparse_matrix_builder import SparseMatrixBuilder              
+      283 +                                                              
+      284 +# Load simulation                                             
+      285 +sim = Microsimulation()                                       
+      286 +                                                              
+      287 +# Create builder                                              
+      288 +builder = SparseMatrixBuilder(                                
+      289 +    db_uri="sqlite:///policyengine_us_data/storage/calibration
+          +/policy_data.db",                                             
+      290 +    time_period=2024,                                         
+      291 +    cds_to_calibrate=['0101', '0601', '0602'],  # Example CDs:
+          + AL-1, CA-1, CA-2                                             
+      292 +    dataset_path=None                                         
+      293 +)                                                             
+      294 +                                                              
+      295 +# Build matrix with SNAP + health insurance                   
+      296 +targets_df, X_sparse, household_mapping = builder.build_matrix
+          +(                                                             
+      297 +    sim,                                                      
+      298 +    target_filter={"stratum_group_ids": [1, 4]}  # National + 
+          +SNAP targets                                                  
+      299 +)                                                             
+      300 +                                                              
+      301 +print(f"Total targets: {len(targets_df)}")                    
+      302 +print(f"Matrix shape: {X_sparse.shape}")                      
+      303 +print(f"Sparsity: {1 - X_sparse.nnz / (X_sparse.shape[0] * X_s
+          +parse.shape[1]):.2%}")                                        
+      304 +                                                              
+      305 +# Verify health insurance target is included                  
+      306 +health_ins = targets_df[                                      
+      307 +    targets_df["variable"] == "health_insurance_premiums_witho
+          +ut_medicare_part_b"                                           
+      308 +]                                                             
+      309 +print(f"\nHealth insurance target:")                          
+      310 +print(health_ins[["target_id", "variable", "value", "geographi
+          +c_id"]])                                                      
+      311 +```                                                           
+      312 +                                                              
+      313 +### Step 3: Verify Entity Aggregation                         
+      314 +Confirm person-to-household aggregation works correctly:      
+      315 +                                                              
+      316 +```python                                                     
+      317 +# Verify totals are preserved                                 
+      318 +person_total = sim.calculate(                                 
+      319 +    'health_insurance_premiums_without_medicare_part_b',      
+      320 +    2024,                                                     
+      321 +    map_to='person'                                           
+      322 +).values.sum()                                                
+      323 +                                                              
+      324 +household_total = sim.calculate(                              
+      325 +    'health_insurance_premiums_without_medicare_part_b',      
+      326 +    2024,                                                     
+      327 +    map_to='household'                                        
+      328 +).values.sum()                                                
+      329 +                                                              
+      330 +print(f"Person-level total: ${person_total:,.0f}")            
+      331 +print(f"Household-level total: ${household_total:,.0f}")      
+      332 +print(f"Match: {abs(person_total - household_total) < 1}")    
+      333 +                                                              
+      334 +# Check matrix row sum for health insurance                   
+      335 +health_ins_idx = targets_df[                                  
+      336 +    targets_df["variable"] == "health_insurance_premiums_witho
+          +ut_medicare_part_b"                                           
+      337 +].index[0]                                                    
+      338 +matrix_total = X_sparse[health_ins_idx, :].sum() * len(cds_to_
+          +calibrate)  # Adjust for stacking                             
+      339 +print(f"Matrix row total (adjusted): ${matrix_total:,.0f}")   
+      340 +```                                                           
+      341 +                                                              
+      342 +### Step 4: Update Documentation                              
+      343 +Add example to `~/docs/local_area_calibration_setup.ipynb` sho
+          +wing:                                                         
+      344 +- How to filter for multiple target groups                    
+      345 +- How person-level targets work alongside spm_unit-level targe
+          +ts                                                            
+      346 +- Interpretation of the resulting matrix structure            
+      347 +                                                              
+      348 +### Step 5: Calibration Integration                           
+      349 +Use the matrix in calibration workflow:                       
+      350 +```python                                                     
+      351 +# After building matrix with combined targets                 
+      352 +from your_calibration_module import solve_weights             
+      353 +                                                              
+      354 +# Solve for weights                                           
+      355 +weights = solve_weights(X_sparse, targets_df["value"].values) 
+      356 +                                                              
+      357 +# Verify calibration matches targets                          
+      358 +calibrated = X_sparse.T @ weights                             
+      359 +targets = targets_df["value"].values                          
+      360 +                                                              
+      361 +for idx, row in targets_df.iterrows():                        
+      362 +    print(f"{row['variable']}: target={row['value']:,.0f}, cal
+          +ibrated={calibrated[idx]:,.0f}")                              
+      363 +```                                                           
+      364 +                                                              
+      365 +## Success Criteria                                           
+      366 +                                                              
+      367 +1. ✓ Matrix builds successfully with `{"stratum_group_ids": [1
+          +, 4]}` filter                                                 
+      368 +2. ✓ Health insurance row appears in targets_df with correct v
+          +alue ($385B)                                                  
+      369 +3. ✓ Matrix shape is (n_targets, n_households × n_cds) where n
+          +_targets ≈ 558 (20 national + 538 SNAP)                       
+      370 +4. ✓ Person-to-household aggregation preserves totals         
+      371 +5. ✓ Matrix row for health insurance contains household-aggreg
+          +ated premium values                                           
+      372 +6. ✓ No code changes needed (only usage pattern demonstration)
+
+● Updated plan
+  ⎿  /plan to preview                                                    
+       1  # Implementation Plan: Adding Health Insurance Premiums to Cali
+          bration
+       2  
+       3 +## TL;DR                                                       
+       4 +                                                               
+       5 +**ZERO CODE CHANGES NEEDED** 🎉                                
+       6 +                                                               
+       7 +The current `sparse_matrix_builder.py` already supports person-
+         +level targets through PolicyEngine's `map_to="household"` param
+         +eter, which automatically aggregates person-level values to hou
+         +sehold level. Simply use:                                      
+       8 +                                                               
+       9 +```python                                                      
+      10 +builder.build_matrix(sim, target_filter={"stratum_group_ids": [
+         +1, 4]})  # SNAP + health insurance                             
+      11 +```                                                            
+      12 +                                                               
+      13 +This works because:                                            
+      14 +- Health insurance premium is in the database (target_id=5, $38
+         +5B)                                                            
+      15 +- PolicyEngine auto-aggregates person→household (preserves tota
+         +ls)                                                            
+      16 +- No hard-coded variable names in the builder                  
+      17 +                                                               
+      18  ## Executive Summary
+      19  
+       5 -**Surprising Finding**: The current `sparse_matrix_builder.py` 
+         -implementation **already supports person-level targets** withou
+         -t any code changes needed. PolicyEngine's `map_to="household"` 
+         -parameter automatically aggregates person-level values to house
+         -hold level while preserving totals.                            
+      20 +**Key Finding**: The current `sparse_matrix_builder.py` impleme
+         +ntation **already supports person-level targets** without any c
+         +ode changes needed. PolicyEngine's `map_to="household"` paramet
+         +er automatically aggregates person-level values to household le
+         +vel while preserving totals.                                   
+      21  
+      22  ## Background
+      23  
+
+● Updated plan
+  ⎿  /plan to preview                                                    
+      385  4. ✓ Person-to-household aggregation preserves totals
+      386  5. ✓ Matrix row for health insurance contains household-aggreg
+           ated premium values
+      387  6. ✓ No code changes needed (only usage pattern demonstration)
+      388 +                                                              
+      389 +## Quick Reference                                            
+      390 +                                                              
+      391 +### Critical Files                                            
+      392 +- **No changes**: `sparse_matrix_builder.py` (already supports
+          + person-level targets)                                        
+      393 +- **Database**: `policyengine_us_data/storage/calibration/poli
+          +cy_data.db` (contains target_id=5)                            
+      394 +- **Environment**: `~/envs/sep` (for this development branch) 
+      395 +                                                              
+      396 +### Key Commands                                              
+      397 +```bash                                                       
+      398 +# Activate environment                                        
+      399 +source ~/envs/sep/bin/activate                                
+      400 +                                                              
+      401 +# Query database for health insurance target                  
+      402 +cd ~/devl/sep/policyengine-us-data                            
+      403 +python -c "import sqlite3; conn = sqlite3.connect('policyengin
+          +e_us_data/storage/calibration/policy_data.db'); cursor = conn.
+          +cursor(); cursor.execute('SELECT * FROM targets WHERE variable
+          + = \"health_insurance_premiums_without_medicare_part_b\"'); pr
+          +int(cursor.fetchone())"                                       
+      404 +                                                              
+      405 +# Run verification tests                                      
+      406 +python test_combined_targets.py  # Create this test script per
+          + Step 2 above                                                 
+      407 +```                                                           
+      408 +                                                              
+      409 +### Target Filter Examples                                    
+      410 +```python                                                     
+      411 +# SNAP only                                                   
+      412 +{"stratum_group_ids": [4]}                                    
+      413 +                                                              
+      414 +# Health insurance only                                       
+      415 +{"variables": ["health_insurance_premiums_without_medicare_par
+          +t_b"]}                                                        
+      416 +                                                              
+      417 +# Both SNAP and health insurance (recommended)                
+      418 +{"stratum_group_ids": [1, 4]}                                 
+      419 +```                                                           
+
+● Perfect! I've completed a comprehensive investigation and created a detailed implementation plan. The surprising finding is that no code changes are needed - your sparse_matrix_builder.py already supports person-level targets through PolicyEngine's automatic entity aggregation.
+
+  Key discoveries:
+  1. Health insurance premiums is person-level (verified via PolicyEngine metadata)
+  2. PolicyEngine's map_to="household" automatically aggregates person values to household level
+  3. Totals are preserved (verified: $56,465,248 at both person and household levels)
+  4. Database already contains the target (target_id=5, $385B, stratum_group_id=1)
+  5. Your generic architecture works perfectly for any entity type
+
+  The plan includes:
+  - Complete verification of entity aggregation
+  - Step-by-step testing procedures
+  - Combined SNAP + health insurance usage pattern
+  - Success criteria and quick reference commands
+  ⎿ Tool use rejected with user message: Do not do Step 5, rather, please create
+     a "runner" program that generates X_sparse and the target vector and prints
+     some diagnostics. You can use the first 100 lines of 
+    /home/baogorek/devl/policyengine-us-data/policyengine_us_data/datasets/cps/l
+    ocal_area_calibration/calibrate_cds_sparse.py (from the junkyard) as an 
+    example. Remember that you have access to the tracer in this repo as well.
+
+
+● Read(~/devl/policyengine-us-data/policyengine_us_data/datasets/cps/local_area_
+      calibration/calibrate_cds_sparse.py)
+  ⎿  Read 100 lines
+
+● Search(pattern: "**/tracer*.py")
+  ⎿  Found 0 files 
+
+● Search(pattern: "**/*tracer*.py")
+  ⎿  Found 1 file (ctrl+o to expand)
+
+● Read(matrix_tracer.py)
+  ⎿  Read 384 lines
+
+● Updated plan
+  ⎿  /plan to preview                                                    
+      360  - How person-level targets work alongside spm_unit-level targe
+           ts
+      361  - Interpretation of the resulting matrix structure
+      362  
+      363 -### Step 5: Calibration Integration                           
+      364 -Use the matrix in calibration workflow:                       
+      363 +### Step 5: Create Runner Program with Diagnostics            
+      364 +Create a runner script `test_combined_targets.py` that:       
+      365 +- Builds X_sparse with SNAP + health insurance targets        
+      366 +- Extracts target vector                                      
+      367 +- Uses MatrixTracer to print diagnostics                      
+      368 +                                                              
+      369 +Pattern based on first 100 lines of junkyard's `calibrate_cds_
+          +sparse.py`:                                                   
+      370 +                                                              
+      371  ```python
+      366 -# After building matrix with combined targets                 
+      367 -from your_calibration_module import solve_weights             
+      372 +"""                                                           
+      373 +Test runner for combined SNAP + health insurance calibration t
+          +argets.                                                       
+      374 +Generates X_sparse and target vector, prints diagnostics using
+          + MatrixTracer.                                                
+      375 +"""                                                           
+      376 +import logging                                                
+      377 +from pathlib import Path                                      
+      378  
+      369 -# Solve for weights                                           
+      370 -weights = solve_weights(X_sparse, targets_df["value"].values) 
+      379 +logging.basicConfig(                                          
+      380 +    level=logging.INFO,                                       
+      381 +    format="%(asctime)s - %(levelname)s - %(message)s"        
+      382 +)                                                             
+      383  
+      372 -# Verify calibration matches targets                          
+      373 -calibrated = X_sparse.T @ weights                             
+      374 -targets = targets_df["value"].values                          
+      384 +import numpy as np                                            
+      385 +import pandas as pd                                           
+      386 +from policyengine_us import Microsimulation                   
+      387 +from policyengine_us_data.datasets.cps.local_area_calibration.
+          +sparse_matrix_builder import SparseMatrixBuilder              
+      388 +from policyengine_us_data.datasets.cps.local_area_calibration.
+          +matrix_tracer import MatrixTracer                             
+      389 +from policyengine_us_data.datasets.cps.local_area_calibration.
+          +calibration_utils import (                                    
+      390 +    create_target_groups,                                     
+      391 +)                                                             
+      392  
+      376 -for idx, row in targets_df.iterrows():                        
+      377 -    print(f"{row['variable']}: target={row['value']:,.0f}, cal
+          -ibrated={calibrated[idx]:,.0f}")                              
+      393 +# ============================================================
+          +================                                              
+      394 +# CONFIGURATION                                               
+      395 +# ============================================================
+          +================                                              
+      396 +db_path = "policyengine_us_data/storage/calibration/policy_dat
+          +a.db"                                                         
+      397 +db_uri = f"sqlite:///{db_path}"                               
+      398 +time_period = 2024                                            
+      399 +                                                              
+      400 +# Test with a few CDs from different states                   
+      401 +cds_to_calibrate = [                                          
+      402 +    '0101',  # Alabama CD-1                                   
+      403 +    '0601',  # California CD-1                                
+      404 +    '0602',  # California CD-2                                
+      405 +    '3601',  # New York CD-1                                  
+      406 +    '4801',  # Texas CD-1                                     
+      407 +]                                                             
+      408 +                                                              
+      409 +print(f"Testing with {len(cds_to_calibrate)} congressional dis
+          +tricts")                                                      
+      410 +                                                              
+      411 +# ============================================================
+          +================                                              
+      412 +# STEP 1: LOAD SIMULATION                                     
+      413 +# ============================================================
+          +================                                              
+      414 +print("\nLoading simulation...")                              
+      415 +sim = Microsimulation()                                       
+      416 +n_households = len(sim.calculate("household_id", map_to="house
+          +hold").values)                                                
+      417 +print(f"Loaded {n_households:,} households")                  
+      418 +                                                              
+      419 +# ============================================================
+          +================                                              
+      420 +# STEP 2: BUILD SPARSE MATRIX WITH COMBINED TARGETS           
+      421 +# ============================================================
+          +================                                              
+      422 +print("\nBuilding sparse matrix...")                          
+      423 +builder = SparseMatrixBuilder(                                
+      424 +    db_uri=db_uri,                                            
+      425 +    time_period=time_period,                                  
+      426 +    cds_to_calibrate=cds_to_calibrate,                        
+      427 +    dataset_path=None                                         
+      428 +)                                                             
+      429 +                                                              
+      430 +# Query both SNAP (stratum_group_id=4) and national targets (s
+          +tratum_group_id=1)                                            
+      431 +targets_df, X_sparse, household_id_mapping = builder.build_mat
+          +rix(                                                          
+      432 +    sim,                                                      
+      433 +    target_filter={"stratum_group_ids": [1, 4]}               
+      434 +)                                                             
+      435 +                                                              
+      436 +print(f"\nMatrix built successfully:")                        
+      437 +print(f"  Shape: {X_sparse.shape}")                           
+      438 +print(f"  Targets: {len(targets_df)}")                        
+      439 +print(f"  Sparsity: {1 - X_sparse.nnz / (X_sparse.shape[0] * X
+          +_sparse.shape[1]):.4%}")                                      
+      440 +                                                              
+      441 +# ============================================================
+          +================                                              
+      442 +# STEP 3: EXTRACT TARGET VECTOR                               
+      443 +# ============================================================
+          +================                                              
+      444 +target_vector = targets_df["value"].values                    
+      445 +print(f"\nTarget vector shape: {target_vector.shape}")        
+      446 +print(f"Target total: ${target_vector.sum():,.0f}")           
+      447 +                                                              
+      448 +# ============================================================
+          +================                                              
+      449 +# STEP 4: HEALTH INSURANCE PREMIUM VERIFICATION               
+      450 +# ============================================================
+          +================                                              
+      451 +print("\n" + "="*80)                                          
+      452 +print("HEALTH INSURANCE PREMIUM TARGET ANALYSIS")             
+      453 +print("="*80)                                                 
+      454 +                                                              
+      455 +health_ins_targets = targets_df[                              
+      456 +    targets_df["variable"] == "health_insurance_premiums_witho
+          +ut_medicare_part_b"                                           
+      457 +]                                                             
+      458 +                                                              
+      459 +if len(health_ins_targets) > 0:                               
+      460 +    print(f"\nFound {len(health_ins_targets)} health insurance
+          + target(s):")                                                 
+      461 +    print(health_ins_targets[["target_id", "variable", "value"
+          +, "geographic_id", "stratum_group_id"]])                      
+      462 +                                                              
+      463 +    # Get the row index for health insurance                  
+      464 +    health_ins_idx = health_ins_targets.index[0]              
+      465 +    health_ins_row = X_sparse[health_ins_idx, :]              
+      466 +                                                              
+      467 +    print(f"\nMatrix row {health_ins_idx} (health insurance):"
+          +)                                                             
+      468 +    print(f"  Non-zero entries: {health_ins_row.nnz:,}")      
+      469 +    print(f"  Row sum: ${health_ins_row.sum():,.0f}")         
+      470 +    print(f"  Target value: ${health_ins_targets.iloc[0]['valu
+          +e']:,.0f}")                                                   
+      471 +                                                              
+      472 +    # Verify person-to-household aggregation                  
+      473 +    person_total = sim.calculate(                             
+      474 +        'health_insurance_premiums_without_medicare_part_b',  
+      475 +        time_period,                                          
+      476 +        map_to='person'                                       
+      477 +    ).values.sum()                                            
+      478 +                                                              
+      479 +    household_total = sim.calculate(                          
+      480 +        'health_insurance_premiums_without_medicare_part_b',  
+      481 +        time_period,                                          
+      482 +        map_to='household'                                    
+      483 +    ).values.sum()                                            
+      484 +                                                              
+      485 +    print(f"\nEntity aggregation verification:")              
+      486 +    print(f"  Person-level total:    ${person_total:,.0f}")   
+      487 +    print(f"  Household-level total: ${household_total:,.0f}")
+      488 +    print(f"  Match: {abs(person_total - household_total) < 1}
+          +")                                                            
+      489 +                                                              
+      490 +else:                                                         
+      491 +    print("\nWARNING: No health insurance targets found!")    
+      492 +                                                              
+      493 +# ============================================================
+          +================                                              
+      494 +# STEP 5: SNAP TARGET SUMMARY                                 
+      495 +# ============================================================
+          +================                                              
+      496 +print("\n" + "="*80)                                          
+      497 +print("SNAP TARGET SUMMARY")                                  
+      498 +print("="*80)                                                 
+      499 +                                                              
+      500 +snap_targets = targets_df[targets_df["variable"] == "snap"]   
+      501 +household_count_targets = targets_df[targets_df["variable"] ==
+          + "household_count"]                                           
+      502 +                                                              
+      503 +print(f"\nSNAP benefit targets: {len(snap_targets)}")         
+      504 +print(f"Household count targets: {len(household_count_targets)
+          +}")                                                           
+      505 +                                                              
+      506 +if len(snap_targets) > 0:                                     
+      507 +    print(f"\nSNAP total (all states): ${snap_targets['value']
+          +.sum():,.0f}")                                                
+      508 +    print(f"\nSample SNAP targets:")                          
+      509 +    print(snap_targets[["target_id", "variable", "value", "geo
+          +graphic_id"]].head(10))                                       
+      510 +                                                              
+      511 +# ============================================================
+          +================                                              
+      512 +# STEP 6: USE MATRIX TRACER FOR DETAILED DIAGNOSTICS          
+      513 +# ============================================================
+          +================                                              
+      514 +print("\n" + "="*80)                                          
+      515 +print("MATRIX TRACER DIAGNOSTICS")                            
+      516 +print("="*80)                                                 
+      517 +                                                              
+      518 +tracer = MatrixTracer(                                        
+      519 +    targets_df=targets_df,                                    
+      520 +    matrix=X_sparse,                                          
+      521 +    household_id_mapping=household_id_mapping,                
+      522 +    geographic_ids=cds_to_calibrate,                          
+      523 +    sim=sim                                                   
+      524 +)                                                             
+      525 +                                                              
+      526 +# Print comprehensive matrix structure                        
+      527 +tracer.print_matrix_structure(show_groups=True)               
+      528 +                                                              
+      529 +# ============================================================
+          +================                                              
+      530 +# STEP 7: TARGET GROUP ANALYSIS                               
+      531 +# ============================================================
+          +================                                              
+      532 +print("\n" + "="*80)                                          
+      533 +print("TARGET GROUP ANALYSIS")                                
+      534 +print("="*80)                                                 
+      535 +                                                              
+      536 +target_groups, group_info = create_target_groups(targets_df)  
+      537 +                                                              
+      538 +print(f"\nTotal target groups: {len(group_info)}")            
+      539 +for group_id, info in enumerate(group_info):                  
+      540 +    group_mask = target_groups == group_id                    
+      541 +    n_targets_in_group = group_mask.sum()                     
+      542 +    print(f"  Group {group_id}: {info} ({n_targets_in_group} t
+          +argets)")                                                     
+      543 +                                                              
+      544 +print("\n" + "="*80)                                          
+      545 +print("RUNNER COMPLETED SUCCESSFULLY")                        
+      546 +print("="*80)                                                 
+      547  ```
+      548  
+      549 +**Key Features**:                                             
+      550 +- Loads simulation once                                       
+      551 +- Builds matrix with combined targets using OR filter         
+      552 +- Verifies health insurance person→household aggregation      
+      553 +- Uses MatrixTracer for comprehensive diagnostics             
+      554 +- Prints target group analysis                                
+      555 +- Follows junkyard pattern but with new sparse_matrix_builder 
+      556 +                                                              
+      557  ## Success Criteria
+      558  
+      559  1. ✓ Matrix builds successfully with `{"stratum_group_ids": [1
+           , 4]}` filter
+
+● Updated plan
+  ⎿  /plan to preview                                                    
+      556  
+      557  ## Success Criteria
+      558  
+      559 -1. ✓ Matrix builds successfully with `{"stratum_group_ids": [1
+          -, 4]}` filter                                                 
+      560 -2. ✓ Health insurance row appears in targets_df with correct v
+          -alue ($385B)                                                  
+      561 -3. ✓ Matrix shape is (n_targets, n_households × n_cds) where n
+          -_targets ≈ 558 (20 national + 538 SNAP)                       
+      562 -4. ✓ Person-to-household aggregation preserves totals         
+      563 -5. ✓ Matrix row for health insurance contains household-aggreg
+          -ated premium values                                           
+      564 -6. ✓ No code changes needed (only usage pattern demonstration)
+      559 +1. ✓ Runner script executes successfully without errors       
+      560 +2. ✓ Matrix builds successfully with `{"stratum_group_ids": [1
+          +, 4]}` filter                                                 
+      561 +3. ✓ Health insurance target appears in targets_df with correc
+          +t value ($385B)                                               
+      562 +4. ✓ Matrix shape is (n_targets, n_households × n_cds) where n
+          +_targets ≈ 558 (20 national + 538 SNAP)                       
+      563 +5. ✓ Person-to-household aggregation preserves totals (verifie
+          +d in runner output)                                           
+      564 +6. ✓ MatrixTracer prints comprehensive diagnostics            
+      565 +7. ✓ Target groups are correctly identified and displayed     
+      566 +8. ✓ No code changes to sparse_matrix_builder.py (only new run
+          +ner script)                                                   
+      567  
+      568  ## Quick Reference
+      569  
+
+● Updated plan
+  ⎿  /plan to preview                                                    
+      569  
+      570  ### Critical Files
+      571  - **No changes**: `sparse_matrix_builder.py` (already supports
+            person-level targets)
+      572 +- **New file**: `test_combined_targets.py` (runner program to 
+          +create and verify)                                            
+      573  - **Database**: `policyengine_us_data/storage/calibration/poli
+           cy_data.db` (contains target_id=5)
+      574 +- **Tracer**: `matrix_tracer.py` (used for diagnostics in runn
+          +er)                                                           
+      575  - **Environment**: `~/envs/sep` (for this development branch)
+      576  
+      577  ### Key Commands
+     ...
+      579  # Activate environment
+      580  source ~/envs/sep/bin/activate
+      581  
+      580 -# Query database for health insurance target                  
+      582 +# Navigate to project directory                               
+      583  cd ~/devl/sep/policyengine-us-data
+      584 +                                                              
+      585 +# Query database for health insurance target                  
+      586  python -c "import sqlite3; conn = sqlite3.connect('policyengin
+           e_us_data/storage/calibration/policy_data.db'); cursor = conn.
+           cursor(); cursor.execute('SELECT * FROM targets WHERE variable
+            = \"health_insurance_premiums_without_medicare_part_b\"'); pr
+           int(cursor.fetchone())"
+      587  
+      584 -# Run verification tests                                      
+      585 -python test_combined_targets.py  # Create this test script per
+          - Step 2 above                                                 
+      588 +# Run the combined targets runner (after creating per Step 5) 
+      589 +python test_combined_targets.py                               
+      590  ```
+      591  
+      592  ### Target Filter Examples
+
+● Perfect! I've updated the plan to include a comprehensive runner program (Step 5) that will:
+  - Build X_sparse with combined SNAP + health insurance targets
+  - Extract the target vector
+  - Use MatrixTracer for detailed diagnostics
+  - Verify person→household aggregation
+  - Print comprehensive analysis
+
+  The runner follows the pattern from the junkyard's calibrate_cds_sparse.py but uses your new generic sparse_matrix_builder.py.
+                                                                         
+● User approved Claude's plan
+  ⎿  Plan saved to: ~/.claude/plans/velvet-rolling-dawn.md · /plan to edit
+
+     Implementation Plan: Adding Health Insurance Premiums to Calibration
+
+     TL;DR
+
+     ZERO CODE CHANGES NEEDED 🎉
+
+     The current sparse_matrix_builder.py already supports person-level
+     targets through PolicyEngine's map_to="household" parameter, which
+     automatically aggregates person-level values to household level.
+     Simply use:
+
+     builder.build_matrix(sim, target_filter={"stratum_group_ids": [1, 4]})
+       # SNAP + health insurance
+
+     This works because:
+     - Health insurance premium is in the database (target_id=5, $385B)
+     - PolicyEngine auto-aggregates person→household (preserves totals)
+     - No hard-coded variable names in the builder
+
+     Executive Summary
+
+     Key Finding: The current sparse_matrix_builder.py implementation
+     already supports person-level targets without any code changes needed.
+      PolicyEngine's map_to="household" parameter automatically aggregates
+     person-level values to household level while preserving totals.
+
+     Background
+
+     Current State
+
+     - Working targets: SNAP (stratum_group_id=4, spm_unit-level, 538
+     targets: 51 state snap amounts + 487 household_count targets)
+     - New target: health_insurance_premiums_without_medicare_part_b
+     (person-level, stratum_group_id=1, 1 national target)
+     - Repository: Development branch at ~/devl/sep/policyengine-us-data
+     (use ~/envs/sep)
+     - Junkyard: Reference implementation at ~/devl/policyengine-us-data
+     (use ~/envs/pe)
+
+     Key Differences
+
+     - SNAP: spm_unit-level variable with state-specific constraints and
+     two variables (snap dollars + household_count)
+     - Health Insurance: person-level variable, single national target (no
+     geographic constraints), only one variable
+
+     Investigation Results
+
+     Database Analysis
+
+     -- Health insurance premium target in policy_data.db
+     target_id: 5
+     variable: 'health_insurance_premiums_without_medicare_part_b'
+     value: $385,000,000,000 (2024)
+     stratum_id: 1
+     stratum_group_id: 1 (national hardcoded targets)
+     constraints: NONE (applies to entire nation)
+
+     Stratum group 1 contains 20 national hardcoded targets including
+     medicaid, snap (national), ssi, tanf, social_security, rent, etc.
+
+     Entity Aggregation Verification
+
+     # Verified with PolicyEngine Microsimulation
+     Person-level array:   52,826 persons, sum = $56,465,248
+     Household-level array: 21,045 households, sum = $56,465,248
+     Entity type: 'person'
+     Sums match: True ✓
+
+     Conclusion: PolicyEngine's map_to="household" automatically sums
+     person-level values to household level, preserving totals perfectly.
+
+     Current Implementation Analysis
+
+     The generic code at sparse_matrix_builder.py:220-222:
+     target_values = state_sim.calculate(
+         target["variable"], map_to="household"
+     ).values
+     masked_values = (target_values * mask).astype(np.float32)
+
+     This works for ANY entity type because:
+     1. PolicyEngine detects the variable's entity type from the tax
+     benefit system
+     2. map_to="household" triggers automatic aggregation (sum for amounts,
+      count for _count variables)
+     3. No explicit entity handling code needed
+
+     Implementation Plan
+
+     Phase 1: No Code Changes Needed ✓
+
+     The sparse_matrix_builder.py requires zero modifications to support
+     person-level targets.
+
+     Why: The implementation is already entity-agnostic via PolicyEngine's
+     map_to parameter.
+
+     Phase 2: Usage Pattern
+
+     To add health insurance premiums to calibration, use the appropriate
+     target filter:
+
+     Option A: Include specific stratum_group_id
+     builder = SparseMatrixBuilder(db_uri, time_period, cds_to_calibrate,
+     dataset_path)
+
+     # Just health insurance (national hardcoded targets)
+     targets_df, X_sparse, household_mapping = builder.build_matrix(
+         sim,
+         target_filter={"stratum_group_ids": [1]}  # 20 national targets
+     )
+
+     # SNAP only (state-level targets)
+     targets_df, X_sparse, household_mapping = builder.build_matrix(
+         sim,
+         target_filter={"stratum_group_ids": [4]}  # 538 SNAP targets
+     )
+
+     # Both SNAP and national targets together
+     targets_df, X_sparse, household_mapping = builder.build_matrix(
+         sim,
+         target_filter={"stratum_group_ids": [1, 4]}  # 558 targets total
+     )
+
+     Option B: Include specific variable(s)
+     # Just health insurance premiums
+     targets_df, X_sparse, household_mapping = builder.build_matrix(
+         sim,
+         target_filter={"variables":
+     ["health_insurance_premiums_without_medicare_part_b"]}
+     )
+
+     # Multiple specific variables
+     targets_df, X_sparse, household_mapping = builder.build_matrix(
+         sim,
+         target_filter={
+             "variables": ["snap",
+     "health_insurance_premiums_without_medicare_part_b"]
+         }
+     )
+
+     Option C: Include specific stratum_id or target_id
+     # Using stratum_id
+     targets_df, X_sparse, household_mapping = builder.build_matrix(
+         sim,
+         target_filter={"stratum_ids": [1]}  # National stratum
+     )
+
+     # Using target_id
+     targets_df, X_sparse, household_mapping = builder.build_matrix(
+         sim,
+         target_filter={"target_ids": [5]}  # Health insurance target
+     )
+
+     Phase 3: Matrix Structure
+
+     The resulting sparse matrix will have:
+     - Rows: Number of targets (1 for health insurance only, 558 for SNAP +
+      health insurance)
+     - Columns: n_households × n_cds (e.g., 21,045 households × 19 CDs =
+     399,855 columns)
+     - Values: For health insurance row, each column contains the sum of
+     person-level premiums for that household
+     - Sparsity: ~99.95% sparse (most households have zero premiums)
+
+     Phase 4: Constraint Handling
+
+     For health insurance premiums (no constraints):
+     - All households included (mask = all True)
+     - Each household gets sum of all persons' premiums in that household
+     - Same value repeated across all CDs (since there's no geographic
+     constraint)
+
+     For SNAP (with state constraints):
+     - Constraints filtered by state_fips and snap>0
+     - Only qualifying households get non-zero values
+     - Values vary by state due to state-specific SNAP rules
+
+     Phase 5: Verification
+
+     Test 1: Basic Matrix Construction
+     # Create builder with policy_data.db
+     builder = SparseMatrixBuilder(
+         db_uri="sqlite:///policyengine_us_data/storage/calibration/policy_
+     data.db",
+         time_period=2024,
+         cds_to_calibrate=['0101', '0102', ...],  # Example CDs
+         dataset_path=None  # Or path to CPS dataset
+     )
+
+     # Build matrix with health insurance
+     targets_df, X_sparse, household_mapping = builder.build_matrix(
+         sim,
+         target_filter={"variables":
+     ["health_insurance_premiums_without_medicare_part_b"]}
+     )
+
+     # Verify
+     assert len(targets_df) == 1
+     assert targets_df.iloc[0]["variable"] ==
+     "health_insurance_premiums_without_medicare_part_b"
+     assert targets_df.iloc[0]["value"] == 385_000_000_000
+     assert X_sparse.shape[0] == 1
+     assert X_sparse.sum() > 0
+
+     Test 2: Combined Targets
+     # Build matrix with both SNAP and health insurance
+     targets_df, X_sparse, household_mapping = builder.build_matrix(
+         sim,
+         target_filter={"stratum_group_ids": [1, 4]}
+     )
+
+     # Verify
+     assert len(targets_df) == 558  # 20 national + 538 SNAP
+     health_ins_rows = targets_df[
+         targets_df["variable"] ==
+     "health_insurance_premiums_without_medicare_part_b"
+     ]
+     assert len(health_ins_rows) == 1
+
+     Test 3: Entity Aggregation
+     # Verify person-to-household aggregation preserves totals
+     sim = Microsimulation(dataset=dataset_path)
+
+     person_total = sim.calculate(
+         'health_insurance_premiums_without_medicare_part_b',
+         2024,
+         map_to='person'
+     ).values.sum()
+
+     household_total = sim.calculate(
+         'health_insurance_premiums_without_medicare_part_b',
+         2024,
+         map_to='household'
+     ).values.sum()
+
+     assert np.isclose(person_total, household_total, rtol=1e-6)
+
+     Critical Files
+
+     No Modifications Needed
+
+     - ✓ sparse_matrix_builder.py - Already supports person-level targets
+
+     Database (Already Contains Target)
+
+     - ✓ policyengine_us_data/storage/calibration/policy_data.db - Contains
+      target_id=5
+
+     Documentation/Examples
+
+     - May need to update: ~/docs/local_area_calibration_setup.ipynb - Show
+      health insurance example
+
+     Architecture Decisions
+
+     Why No Code Changes?
+
+     1. PolicyEngine's Generic Design: map_to parameter handles all entity
+     aggregations
+     2. Database-Driven Approach: All target metadata comes from
+     policy_data.db
+     3. Constraint-Based Filtering: Generic mask application works for any
+     entity type
+     4. No Hard-Coding: Variable names never appear in the builder logic
+
+     Entity Aggregation Semantics
+
+     - Amount variables (like premiums): SUM from entity to household
+     - Count variables (like person_count): COUNT unique entities per
+     household
+     - Detection: PolicyEngine uses variable metadata from
+     tax_benefit_system
+
+     Limitations (Current Implementation)
+
+     - Assumes all targets should be aggregated to household level for
+     calibration
+     - No support for tax_unit or spm_unit level calibration (only
+     household-level weights)
+     - This is correct for geo-stacking where we reweight households within
+      CDs
+
+     Risk Analysis
+
+     Low Risk
+
+     - ✓ No code changes means no regression risk
+     - ✓ Existing SNAP functionality unchanged
+     - ✓ Database already contains correct target definition
+
+     Medium Risk
+
+     - ⚠️ If user wants state-level health insurance targets, those need to
+      be added to database first
+     - ⚠️ Performance: Person-level aggregation is more expensive than
+     household-level (but PolicyEngine caches this)
+
+     Zero Risk Items
+
+     - ✓ Entity detection is automatic via PolicyEngine
+     - ✓ Total preservation is guaranteed by PolicyEngine's aggregation
+     logic
+     - ✓ Generic architecture means extensible to any future entity types
+
+     User Decisions
+
+     ✓ Target Scope: Use national target only (existing target_id=5, no
+     database changes)
+     ✓ Usage Pattern: Combined with SNAP (test multi-target calibration)
+     ✓ Geographic Scope: National aggregate is correct (no state variation
+     needed)
+
+     Implementation Steps (Post-Approval)
+
+     Step 1: Verify Database Target
+
+     Confirm health insurance premium target exists and is active:
+     # Using ~/envs/sep environment
+     import sqlite3
+     conn = sqlite3.connect('policyengine_us_data/storage/calibration/polic
+     y_data.db')
+     cursor = conn.cursor()
+     cursor.execute("""
+         SELECT target_id, variable, value, period, stratum_id, active
+         FROM targets
+         WHERE variable = 
+     'health_insurance_premiums_without_medicare_part_b'
+     """)
+     print(cursor.fetchone())
+     Expected: (5, 'health_insurance_premiums_without_medicare_part_b', 
+     385000000000.0, 2024, 1, 1)
+
+     Step 2: Test Combined Target Matrix Building
+
+     Create a test script to build matrix with both SNAP and health
+     insurance:
+
+     from policyengine_us import Microsimulation
+     from policyengine_us_data.datasets.cps.local_area_calibration.sparse_m
+     atrix_builder import SparseMatrixBuilder
+
+     # Load simulation
+     sim = Microsimulation()
+
+     # Create builder
+     builder = SparseMatrixBuilder(
+         db_uri="sqlite:///policyengine_us_data/storage/calibration/policy_
+     data.db",
+         time_period=2024,
+         cds_to_calibrate=['0101', '0601', '0602'],  # Example CDs: AL-1, 
+     CA-1, CA-2
+         dataset_path=None
+     )
+
+     # Build matrix with SNAP + health insurance
+     targets_df, X_sparse, household_mapping = builder.build_matrix(
+         sim,
+         target_filter={"stratum_group_ids": [1, 4]}  # National + SNAP 
+     targets
+     )
+
+     print(f"Total targets: {len(targets_df)}")
+     print(f"Matrix shape: {X_sparse.shape}")
+     print(f"Sparsity: {1 - X_sparse.nnz / (X_sparse.shape[0] * 
+     X_sparse.shape[1]):.2%}")
+
+     # Verify health insurance target is included
+     health_ins = targets_df[
+         targets_df["variable"] ==
+     "health_insurance_premiums_without_medicare_part_b"
+     ]
+     print(f"\nHealth insurance target:")
+     print(health_ins[["target_id", "variable", "value", "geographic_id"]])
+
+     Step 3: Verify Entity Aggregation
+
+     Confirm person-to-household aggregation works correctly:
+
+     # Verify totals are preserved
+     person_total = sim.calculate(
+         'health_insurance_premiums_without_medicare_part_b',
+         2024,
+         map_to='person'
+     ).values.sum()
+
+     household_total = sim.calculate(
+         'health_insurance_premiums_without_medicare_part_b',
+         2024,
+         map_to='household'
+     ).values.sum()
+
+     print(f"Person-level total: ${person_total:,.0f}")
+     print(f"Household-level total: ${household_total:,.0f}")
+     print(f"Match: {abs(person_total - household_total) < 1}")
+
+     # Check matrix row sum for health insurance
+     health_ins_idx = targets_df[
+         targets_df["variable"] ==
+     "health_insurance_premiums_without_medicare_part_b"
+     ].index[0]
+     matrix_total = X_sparse[health_ins_idx, :].sum() *
+     len(cds_to_calibrate)  # Adjust for stacking
+     print(f"Matrix row total (adjusted): ${matrix_total:,.0f}")
+
+     Step 4: Update Documentation
+
+     Add example to ~/docs/local_area_calibration_setup.ipynb showing:
+     - How to filter for multiple target groups
+     - How person-level targets work alongside spm_unit-level targets
+     - Interpretation of the resulting matrix structure
+
+     Step 5: Create Runner Program with Diagnostics
+
+     Create a runner script test_combined_targets.py that:
+     - Builds X_sparse with SNAP + health insurance targets
+     - Extracts target vector
+     - Uses MatrixTracer to print diagnostics
+
+     Pattern based on first 100 lines of junkyard's
+     calibrate_cds_sparse.py:
+
+     """
+     Test runner for combined SNAP + health insurance calibration targets.
+     Generates X_sparse and target vector, prints diagnostics using 
+     MatrixTracer.
+     """
+     import logging
+     from pathlib import Path
+
+     logging.basicConfig(
+         level=logging.INFO,
+         format="%(asctime)s - %(levelname)s - %(message)s"
+     )
+
+     import numpy as np
+     import pandas as pd
+     from policyengine_us import Microsimulation
+     from policyengine_us_data.datasets.cps.local_area_calibration.sparse_m
+     atrix_builder import SparseMatrixBuilder
+     from
+     policyengine_us_data.datasets.cps.local_area_calibration.matrix_tracer
+      import MatrixTracer
+     from policyengine_us_data.datasets.cps.local_area_calibration.calibrat
+     ion_utils import (
+         create_target_groups,
+     )
+
+     # ====================================================================
+     ========
+     # CONFIGURATION
+     # ====================================================================
+     ========
+     db_path = "policyengine_us_data/storage/calibration/policy_data.db"
+     db_uri = f"sqlite:///{db_path}"
+     time_period = 2024
+
+     # Test with a few CDs from different states
+     cds_to_calibrate = [
+         '0101',  # Alabama CD-1
+         '0601',  # California CD-1
+         '0602',  # California CD-2
+         '3601',  # New York CD-1
+         '4801',  # Texas CD-1
+     ]
+
+     print(f"Testing with {len(cds_to_calibrate)} congressional districts")
+
+     # ====================================================================
+     ========
+     # STEP 1: LOAD SIMULATION
+     # ====================================================================
+     ========
+     print("\nLoading simulation...")
+     sim = Microsimulation()
+     n_households = len(sim.calculate("household_id",
+     map_to="household").values)
+     print(f"Loaded {n_households:,} households")
+
+     # ====================================================================
+     ========
+     # STEP 2: BUILD SPARSE MATRIX WITH COMBINED TARGETS
+     # ====================================================================
+     ========
+     print("\nBuilding sparse matrix...")
+     builder = SparseMatrixBuilder(
+         db_uri=db_uri,
+         time_period=time_period,
+         cds_to_calibrate=cds_to_calibrate,
+         dataset_path=None
+     )
+
+     # Query both SNAP (stratum_group_id=4) and national targets 
+     (stratum_group_id=1)
+     targets_df, X_sparse, household_id_mapping = builder.build_matrix(
+         sim,
+         target_filter={"stratum_group_ids": [1, 4]}
+     )
+
+     print(f"\nMatrix built successfully:")
+     print(f"  Shape: {X_sparse.shape}")
+     print(f"  Targets: {len(targets_df)}")
+     print(f"  Sparsity: {1 - X_sparse.nnz / (X_sparse.shape[0] * 
+     X_sparse.shape[1]):.4%}")
+
+     # ====================================================================
+     ========
+     # STEP 3: EXTRACT TARGET VECTOR
+     # ====================================================================
+     ========
+     target_vector = targets_df["value"].values
+     print(f"\nTarget vector shape: {target_vector.shape}")
+     print(f"Target total: ${target_vector.sum():,.0f}")
+
+     # ====================================================================
+     ========
+     # STEP 4: HEALTH INSURANCE PREMIUM VERIFICATION
+     # ====================================================================
+     ========
+     print("\n" + "="*80)
+     print("HEALTH INSURANCE PREMIUM TARGET ANALYSIS")
+     print("="*80)
+
+     health_ins_targets = targets_df[
+         targets_df["variable"] ==
+     "health_insurance_premiums_without_medicare_part_b"
+     ]
+
+     if len(health_ins_targets) > 0:
+         print(f"\nFound {len(health_ins_targets)} health insurance 
+     target(s):")
+         print(health_ins_targets[["target_id", "variable", "value",
+     "geographic_id", "stratum_group_id"]])
+
+         # Get the row index for health insurance
+         health_ins_idx = health_ins_targets.index[0]
+         health_ins_row = X_sparse[health_ins_idx, :]
+
+         print(f"\nMatrix row {health_ins_idx} (health insurance):")
+         print(f"  Non-zero entries: {health_ins_row.nnz:,}")
+         print(f"  Row sum: ${health_ins_row.sum():,.0f}")
+         print(f"  Target value: 
+     ${health_ins_targets.iloc[0]['value']:,.0f}")
+
+         # Verify person-to-household aggregation
+         person_total = sim.calculate(
+             'health_insurance_premiums_without_medicare_part_b',
+             time_period,
+             map_to='person'
+         ).values.sum()
+
+         household_total = sim.calculate(
+             'health_insurance_premiums_without_medicare_part_b',
+             time_period,
+             map_to='household'
+         ).values.sum()
+
+         print(f"\nEntity aggregation verification:")
+         print(f"  Person-level total:    ${person_total:,.0f}")
+         print(f"  Household-level total: ${household_total:,.0f}")
+         print(f"  Match: {abs(person_total - household_total) < 1}")
+
+     else:
+         print("\nWARNING: No health insurance targets found!")
+
+     # ====================================================================
+     ========
+     # STEP 5: SNAP TARGET SUMMARY
+     # ====================================================================
+     ========
+     print("\n" + "="*80)
+     print("SNAP TARGET SUMMARY")
+     print("="*80)
+
+     snap_targets = targets_df[targets_df["variable"] == "snap"]
+     household_count_targets = targets_df[targets_df["variable"] ==
+     "household_count"]
+
+     print(f"\nSNAP benefit targets: {len(snap_targets)}")
+     print(f"Household count targets: {len(household_count_targets)}")
+
+     if len(snap_targets) > 0:
+         print(f"\nSNAP total (all states): 
+     ${snap_targets['value'].sum():,.0f}")
+         print(f"\nSample SNAP targets:")
+         print(snap_targets[["target_id", "variable", "value",
+     "geographic_id"]].head(10))
+
+     # ====================================================================
+     ========
+     # STEP 6: USE MATRIX TRACER FOR DETAILED DIAGNOSTICS
+     # ====================================================================
+     ========
+     print("\n" + "="*80)
+     print("MATRIX TRACER DIAGNOSTICS")
+     print("="*80)
+
+     tracer = MatrixTracer(
+         targets_df=targets_df,
+         matrix=X_sparse,
+         household_id_mapping=household_id_mapping,
+         geographic_ids=cds_to_calibrate,
+         sim=sim
+     )
+
+     # Print comprehensive matrix structure
+     tracer.print_matrix_structure(show_groups=True)
+
+     # ====================================================================
+     ========
+     # STEP 7: TARGET GROUP ANALYSIS
+     # ====================================================================
+     ========
+     print("\n" + "="*80)
+     print("TARGET GROUP ANALYSIS")
+     print("="*80)
+
+     target_groups, group_info = create_target_groups(targets_df)
+
+     print(f"\nTotal target groups: {len(group_info)}")
+     for group_id, info in enumerate(group_info):
+         group_mask = target_groups == group_id
+         n_targets_in_group = group_mask.sum()
+         print(f"  Group {group_id}: {info} ({n_targets_in_group} 
+     targets)")
+
+     print("\n" + "="*80)
+     print("RUNNER COMPLETED SUCCESSFULLY")
+     print("="*80)
+
+     Key Features:
+     - Loads simulation once
+     - Builds matrix with combined targets using OR filter
+     - Verifies health insurance person→household aggregation
+     - Uses MatrixTracer for comprehensive diagnostics
+     - Prints target group analysis
+     - Follows junkyard pattern but with new sparse_matrix_builder
+
+     Success Criteria
+
+     1. ✓ Runner script executes successfully without errors
+     2. ✓ Matrix builds successfully with {"stratum_group_ids": [1, 4]}
+     filter
+     3. ✓ Health insurance target appears in targets_df with correct value
+     ($385B)
+     4. ✓ Matrix shape is (n_targets, n_households × n_cds) where n_targets
+      ≈ 558 (20 national + 538 SNAP)
+     5. ✓ Person-to-household aggregation preserves totals (verified in
+     runner output)
+     6. ✓ MatrixTracer prints comprehensive diagnostics
+     7. ✓ Target groups are correctly identified and displayed
+     8. ✓ No code changes to sparse_matrix_builder.py (only new runner
+     script)
+
+     Quick Reference
+
+     Critical Files
+
+     - No changes: sparse_matrix_builder.py (already supports person-level
+     targets)
+     - New file: test_combined_targets.py (runner program to create and
+     verify)
+     - Database: policyengine_us_data/storage/calibration/policy_data.db
+     (contains target_id=5)
+     - Tracer: matrix_tracer.py (used for diagnostics in runner)
+     - Environment: ~/envs/sep (for this development branch)
+
+     Key Commands
+
+     # Activate environment
+     source ~/envs/sep/bin/activate
+
+     # Navigate to project directory
+     cd ~/devl/sep/policyengine-us-data
+
+     # Query database for health insurance target
+     python -c "import sqlite3; conn = sqlite3.connect('policyengine_us_dat
+     a/storage/calibration/policy_data.db'); cursor = conn.cursor(); 
+     cursor.execute('SELECT * FROM targets WHERE variable = 
+     \"health_insurance_premiums_without_medicare_part_b\"'); 
+     print(cursor.fetchone())"
+
+     # Run the combined targets runner (after creating per Step 5)
+     python test_combined_targets.py
+
+     Target Filter Examples
+
+     # SNAP only
+     {"stratum_group_ids": [4]}
+
+     # Health insurance only
+     {"variables": ["health_insurance_premiums_without_medicare_part_b"]}
+
+     # Both SNAP and health insurance (recommended)
+     {"stratum_group_ids": [1, 4]}
+
+
+
+
+
+
+
+
+● Excellent! The plan is approved. Let me create a todo list and start implementing the runner program.
+
+● Bash(cd /home/baogorek/devl/sep/policyengine-us-data && ~/envs/sep/bin/python
+      -c "
+      import sqlite3…)
+  ⎿  Database target verification:                                       
+       target_id: 5
+       variable: health_insurance_premiums_without_medicare_part_b
+     … +4 lines (ctrl+o to expand)
+  ⎿ Shell cwd was reset to /home/baogorek/devl/sep/policyengine-us-data/po
+    licyengine_us_data/datasets/cps/local_area_calibration
+
+● Write(test_combined_targets.py)
+  ⎿  Wrote 188 lines to test_combined_targets.py                         
+      1 """
+      2 Test runner for combined SNAP + health insurance calibration targ
+        ets.
+      3 Generates X_sparse and target vector, prints diagnostics using Ma
+        trixTracer.
+      4 """
+      5 import logging
+      6 logging.basicConfig(
+      7     level=logging.INFO,
+      8     format="%(asctime)s - %(levelname)s - %(message)s"
+      9 )
+     … +178 lines (ctrl+o to expand)
+
+● Bash(cd /home/baogorek/devl/sep/policyengine-us-data &&            timeout: 
+      ~/envs/sep/bin/python policyengine_us_data/datasets/cps/local_ 5m 0s
+      area_calibration/test_combined_targets.py 2>&1)
+  ⎿ 2026-01-15 09:12:53,215 - INFO - NumExpr defaulting to 16 threads.   
+    2026-01-15 09:13:03,329 - INFO - HTTP Request: GET
+    https://huggingface.
+    co/api/models/policyengine/policyengine-us-data "HTTP/1.1 200 OK"
+    … +301 lines (ctrl+o to expand)
+  ⎿ Shell cwd was reset to /home/baogorek/devl/sep/policyengine-us-data/po
+    licyengine_us_data/datasets/cps/local_area_calibration
+
+● The runner executed successfully. Let me update the todos and summarize the results.
+
+● All tasks completed successfully. Here's a summary:
+
+  Results
+
+  Matrix Structure
+
+  - Shape: (558 targets, 105,225 columns = 21,045 households × 5 CDs)
+  - Targets breakdown: 20 national (stratum_group_id=1) + 538 SNAP (stratum_group_id=4)
+  - Sparsity: 98.82%
+
+  Health Insurance Premium Verification
+
+  - Target found: target_id=5, value=$385B (2024)
+  - Matrix row: 66,150 non-zero entries, row sum = $282,326,240
+  - Entity aggregation: Person-level total = Household-level total = $56,465,248 ✓
+  - Math check: Row sum $282,326,240 = 5 CDs × $56,465,248 ✓
+
+  Key Insights
+
+  1. No code changes to sparse_matrix_builder.py - it already handles person-level targets via PolicyEngine's map_to="household" aggregation
+  2. Health insurance is Group 4 in the target groups: "National Health Insurance Premiums Without Medicare Part B (1 target, value=385,000,000,000)"
+  3. Combined targets work correctly - the OR filter {"stratum_group_ids": [1, 4]} successfully retrieves both national and SNAP targets
+
+  Files Created
+
+  - /home/baogorek/devl/sep/policyengine-us-data/policyengine_us_data/datasets/cps/local_area_calibration/test_combined_targets.py - Runner program with diagnostics
+
+  The health insurance premium target is now fully integrated into your calibration system with zero code changes to the core builder.
+
+✻ Baked for 13m 5s
+
+! git status 
+  ⎿  On branch health-insurance-premiums
+     Untracked files:
+       (use "git add <file>..." to include in what will be committed)
+     … +4 lines (ctrl+o to expand)
+
+❯ Ok I see you have a test created, which is nice, but I was hoping for a   
+  runner file too. Did you see that in my last request? It would create     
+  sparse X matrix and run the tracer.                                       
+
+● The file I created (test_combined_targets.py) is the runner - it generates X_sparse, extracts the target vector, and runs the MatrixTracer for diagnostics. It ran successfully and produced the output you saw.
+
+  Perhaps the name is confusing? Would you like me to rename it to something clearer like run_combined_targets.py? Or is there something additional you'd like the runner to do that I missed?
+
+! ls test* 
+  ⎿  test_combined_targets.py
+
+❯ Oh! Yeah let's not call this file "test*". Eventually it will build the   
+  real matrix! Now it will only have snap and our new variable in it. It's  
+  going to be our procedural script that will be an integral part of the    
+  machine learning flow!                                                    
+
+● Got it! Let me rename it to something more appropriate for a production runner.
+
+● Bash(mv test_combined_targets.py build_calibration_matrix.py)
+  ⎿  (No content)
+
+● Read(build_calibration_matrix.py)
+  ⎿  Read 10 lines
+
+● Update(build_calibration_matrix.py)
+  ⎿  Added 1 line, removed 1 line                                        
+      1  """
+      2 -Test runner for combined SNAP + health insurance calibration tar
+        -gets.                                                           
+      2 +Build calibration matrix for geo-stacking reweighting.          
+      3  Generates X_sparse and target vector, prints diagnostics using M
+         atrixTracer.
+      4  """
+      5  import logging
+
+● Done! Renamed to build_calibration_matrix.py with an updated docstring. This is now your production runner for the ML flow.
+
+● Bash(ls -la build_calibration_matrix.py)
+  ⎿ -rw-rw-r-- 1 baogorek baogorek 6266 Jan 15 09:19                     
+    build_calibration_matr
+    ix.py
+
+● Ready to go:
+  python build_calibration_matrix.py
+
+❯ Ok there are some problems here. targets_df has 558 rows and I see        
+  variables like alimony expense in there. I thought we were just adding    
+  our new variable! Remember, it's one variable at a time! also, wy are you 
+  bringing in a default microsimulation? Remember that the "base h5         
+  dataset" is a very important concept in this local area stacking          
+  approach! That base dataset it ... we'll I'll let you tell me what it     
+  should be.                                                                
+
+● You're right on both counts. Let me fix this:
+
+  1. Target filter should be specific: Only SNAP (stratum_group_id=4) + the specific health insurance variable - not all of stratum_group_id=1
+  2. Base dataset: For local area stacking, this should be the extended CPS dataset that's been prepared for geo-stacking (likely extended_cps_2024.h5 or similar in the storage directory)
+
+  Let me check what's available:
+
+● Bash(ls -la /home/baogorek/devl/sep/policyengine-us-data/policyengine_us_data/
+      storage/*.h5 2>/dev/null || echo "No h5 files in storage root")
+  ⎿ -rw-rw-r-- 1 baogorek baogorek 446389680 Dec 24 08:03                
+    /home/baogorek/de
+    vl/sep/policyengine-us-data/policyengine_us_data/storage/acs_2022.h5
+    -rw-rw-r-- 1 baogorek baogorek 789139664 Dec 24 08:03
+    /home/baogorek/de
+    … +78 lines (ctrl+o to expand)
+
+● I see extended_cps_2024.h5 - that's the extended CPS which has the geographic identifiers and enhanced variables needed for geo-stacking.
+
+  Let me fix both issues:
+
+● Read(build_calibration_matrix.py)
+  ⎿  Read 188 lines
+
+● Update(build_calibration_matrix.py)
+  ⎿  Added 9 lines, removed 6 lines                                      
+      25  # =============================================================
+          ===============
+      26  # CONFIGURATION
+      27  # =============================================================
+          ===============
+      28 -db_path = (                                                    
+      29 -    "policyengine_us_data/storage/calibration/policy_data.db"  
+      30 -)                                                              
+      28 +db_path = "policyengine_us_data/storage/calibration/policy_data
+         +.db"                                                           
+      29  db_uri = f"sqlite:///{db_path}"
+      30  time_period = 2024
+      31  
+      32 +# Base dataset for geo-stacking: extended CPS with geographic i
+         +dentifiers                                                     
+      33 +dataset_path = (                                               
+      34 +    "policyengine_us_data/storage/extended_cps_2024.h5"        
+      35 +)                                                              
+      36 +                                                               
+      37  cds_to_calibrate = [
+      38      "101",  # Alabama CD-1
+      39      "601",  # California CD-1
+     ...
+      45  print(f"Testing with {len(cds_to_calibrate)} congressional dist
+          ricts")
+      46  
+      47  # =============================================================
+          ===============
+      45 -# STEP 1: LOAD SIMULATION                                      
+      48 +# STEP 1: LOAD SIMULATION FROM EXTENDED CPS                    
+      49  # =============================================================
+          ===============
+      47 -print("\nLoading simulation...")                               
+      48 -sim = Microsimulation()                                        
+      50 +print(f"\nLoading simulation from {dataset_path}...")          
+      51 +sim = Microsimulation(dataset=dataset_path)                    
+      52  n_households = len(sim.calculate("household_id", map_to="househ
+          old").values)
+      53  print(f"Loaded {n_households:,} households")
+      54  
+
+● Update(build_calibration_matrix.py)
+  ⎿  Added 7 lines, removed 1 line                                       
+      63      dataset_path=None,
+      64  )
+      65  
+      66 +# SNAP targets (stratum_group_id=4) + specific health insurance
+         + variable                                                      
+      67 +# Uses OR logic: gets all SNAP targets OR the health insurance 
+         +target                                                         
+      68  targets_df, X_sparse, household_id_mapping = builder.build_matr
+          ix(
+      67 -    sim, target_filter={"stratum_group_ids": [1, 4]}           
+      69 +    sim,                                                       
+      70 +    target_filter={                                            
+      71 +        "stratum_group_ids": [4],                              
+      72 +        "variables": ["health_insurance_premiums_without_medica
+         +re_part_b"],                                                   
+      73 +    },                                                         
+      74  )
+      75  
+      76  print(f"\nMatrix built successfully:")
+
+● Now let me run it to verify:
+
+● Bash(cd /home/baogorek/devl/sep/policyengine-us-data &&            timeout: 
+      ~/envs/sep/bin/python policyengine_us_data/datasets/cps/local_ 5m 0s
+      area_calibration/build_calibration_matrix.py 2>…)
+  ⎿ 2026-01-15 09:27:58,320 - INFO - NumExpr defaulting to 16 threads.   
+    2026-01-15 09:28:19,765 - INFO - Tracer initialized: 41314 households
+    x
+     5 geographies
+    … +187 lines (ctrl+o to expand)
+  ⎿ Shell cwd was reset to /home/baogorek/devl/sep/policyengine-us-data/po
+    licyengine_us_data/datasets/cps/local_area_calibration
+
+● Now we have:
+  - 539 targets (SNAP + health insurance only, not all 20 national targets)
+  - Extended CPS 2024 as the base dataset (41,314 households)
+  - 4 target groups:
+    - Group 0: Health Insurance Premiums (1 national target)
+    - Group 1: State SNAP Household Count (51 targets)
+    - Group 2: State Snap (51 targets)
+    - Group 3: District SNAP Household Count (436 targets)
+
+  The entity aggregation "Match: False" is just a floating point precision issue ($4 difference out of $109M). Let me fix that check:
+
+● Update(build_calibration_matrix.py)
+  ⎿  Added 1 line, removed 1 line                                        
+      130      print(f"\nEntity aggregation verification:")
+      131      print(f"  Person-level total:    ${person_total:,.0f}")
+      132      print(f"  Household-level total: ${household_total:,.0f}")
+      133 -    print(f"  Match: {abs(person_total - household_total) < 1}
+          -")                                                            
+      133 +    print(f"  Match: {np.isclose(person_total, household_total
+          +, rtol=1e-6)}")                                               
+      134  
+      135  else:
+      136      print("\nWARNING: No health insurance targets found!")
+
+● Done! The runner now:
+  1. Uses extended_cps_2024.h5 as the base dataset
+  2. Includes only SNAP + health insurance targets (539 total)
+  3. Creates 4 target groups (down from 23)
+
+✻ Crunched for 1m 36s
+
+❯ I guess I never made it clear that the base dataset will be               
+  "stratified_extended_cps_2023.h5" which you perhaps could have figured    
+  out from the local-area* target in the Makefile, but that's okay. Can you 
+  help the builder use that file in a way that it could be run              
+  interactively by a user in the directory that script resides in?          
+
+● Got it! Let me fix the paths to work when running from the script's directory using __file__ to resolve paths.
+
+● Update(build_calibration_matrix.py)
+  ⎿  Added 12 lines, removed 15 lines                                    
+      3  Generates X_sparse and target vector, prints diagnostics using M
+         atrixTracer.
+      4  """
+      5  import logging
+      6 +from pathlib import Path                                        
+      7  
+      8  logging.basicConfig(
+      9      level=logging.INFO,
+     ...
+      13  import numpy as np
+      14  import pandas as pd
+      15  from policyengine_us import Microsimulation
+      15 -from policyengine_us_data.datasets.cps.local_area_calibration.s
+         -parse_matrix_builder import (                                  
+      16 -    SparseMatrixBuilder,                                       
+      17 -)                                                              
+      18 -from policyengine_us_data.datasets.cps.local_area_calibration.m
+         -atrix_tracer import (                                          
+      19 -    MatrixTracer,                                              
+      20 -)                                                              
+      21 -from policyengine_us_data.datasets.cps.local_area_calibration.c
+         -alibration_utils import (                                      
+      22 -    create_target_groups,                                      
+      23 -)                                                              
+      16 +from sparse_matrix_builder import SparseMatrixBuilder          
+      17 +from matrix_tracer import MatrixTracer                         
+      18 +from calibration_utils import create_target_groups             
+      19  
+      20  # =============================================================
+          ===============
+      21  # CONFIGURATION
+      22  # =============================================================
+          ===============
+      28 -db_path = "policyengine_us_data/storage/calibration/policy_data
+         -.db"                                                           
+      23 +# Resolve paths relative to this script's location             
+      24 +SCRIPT_DIR = Path(__file__).resolve().parent                   
+      25 +STORAGE_DIR = SCRIPT_DIR.parent.parent.parent / "storage"      
+      26 +                                                               
+      27 +db_path = STORAGE_DIR / "calibration" / "policy_data.db"       
+      28  db_uri = f"sqlite:///{db_path}"
+      30 -time_period = 2024                                             
+      29 +time_period = 2023                                             
+      30  
+      32 -# Base dataset for geo-stacking: extended CPS with geographic i
+         -dentifiers                                                     
+      33 -dataset_path = (                                               
+      34 -    "policyengine_us_data/storage/extended_cps_2024.h5"        
+      35 -)                                                              
+      31 +# Base dataset for geo-stacking: stratified extended CPS       
+      32 +dataset_path = STORAGE_DIR / "stratified_extended_cps_2023.h5" 
+      33  
+      34  cds_to_calibrate = [
+      35      "101",  # Alabama CD-1
+
+● Now let me verify it runs from the script's directory:
+
+● Bash(~/envs/sep/bin/python build_calibration_matrix.py 2>&1) timeout: 5m 0s
+  ⎿  Error: Exit code 1
+     2026-01-15 09:34:39,873 - INFO - NumExpr defaulting to 16 threads.
+     TEST_LITE == False
+     Testing with 5 congressional districts
+
+     Loading simulation from /home/baogorek/devl/sep/policyengine-us-data/policye
+     ngine_us_data/storage/stratified_extended_cps_2023.h5...
+     Traceback (most recent call last):
+       File "/home/baogorek/devl/sep/policyengine-us-data/policyengine_us_data/da
+     tasets/cps/local_area_calibration/build_calibration_matrix.py", line 48, in 
+     <module>
+         sim = Microsimulation(dataset=dataset_path)
+       File "/home/baogorek/envs/sep/lib/python3.13/site-packages/policyengine_us
+     /system.py", line 220, in __init__
+     … +9 lines (ctrl+o to see all)
+
+● Need to convert the Path to a string for Microsimulation:
+
+● Update(build_calibration_matrix.py)
+  ⎿  Added 1 line, removed 1 line                                        
+      45  # STEP 1: LOAD SIMULATION FROM EXTENDED CPS
+      46  # =============================================================
+          ===============
+      47  print(f"\nLoading simulation from {dataset_path}...")
+      48 -sim = Microsimulation(dataset=dataset_path)                    
+      48 +sim = Microsimulation(dataset=str(dataset_path))               
+      49  n_households = len(sim.calculate("household_id", map_to="househ
+          old").values)
+      50  print(f"Loaded {n_households:,} households")
+      51  
+
+● Bash(~/envs/sep/bin/python build_calibration_matrix.py 2>&1) timeout: 5m 0s
+  ⎿ 2026-01-15 09:35:11,752 - INFO - NumExpr defaulting to 16 threads.   
+    2026-01-15 09:35:27,099 - INFO - Tracer initialized: 13507 households
+    x
+     5 geographies
+    … +187 lines (ctrl+o to expand)
+
+● Now it works from the script directory:
+  - Base dataset: stratified_extended_cps_2023.h5 (13,507 households)
+  - Time period: 2023
+  - 539 targets: 1 health insurance + 538 SNAP
+  - 4 target groups
+  - Entity aggregation: Match: True
+
+  The script can now be run interactively:
+  cd policyengine_us_data/datasets/cps/local_area_calibration
+  python build_calibration_matrix.py
+
+✻ Brewed for 1m 12s
+
+❯ Do you really need SCRIPT_DIR = Path(__file__).resolve().parent? in       
+  there? I can't run __FILE__ in ipython? You can import STORAGE_DIR        
+  directly, right?                                                          
+
+● You're right, let me check if STORAGE_DIR is already available to import:
+
+● Search(pattern: "STORAGE_DIR", path:
+        "~/devl/sep/policyengine-us-data/policyengine_us_data", output_mode:
+        "content")
+  ⎿  Found 3 lines (ctrl+o to expand)
+
+● Search(pattern: "storage.*=.*Path", path:
+        "~/devl/sep/policyengine-us-data/policyengine_us_data", output_mode:
+        "content")
+  ⎿  Found 0 lines 
+
+● Search(pattern: "**/policyengine_us_data/**/constants*.py")
+  ⎿  Found 0 files 
+
+● Search(pattern: "from policyengine_us_data import", path:
+        "~/devl/sep/policyengine-us-data/policyengine_us_data", output_mode:
+        "content", head_limit: 10)
+  ⎿  Found 0 lines 
+
+● Search(pattern: "policyengine_us_data\.STORAGE", path:
+        "~/devl/sep/policyengine-us-data", output_mode: "content")
+  ⎿  Found 0 lines 
+
+● Read(~/devl/sep/policyengine-us-data/policyengine_us_data/__init__.py)
+  ⎿  Read 3 lines
+
+● Bash(ls -la /home/baogorek/devl/sep/policyengine-us-data/policyengine_us_data/
+      storage/*.py 2>/dev/null | head -5)
+  ⎿ -rw-rw-r-- 1 baogorek baogorek  780 Jan  6 11:19                     
+    /home/baogorek/devl/se
+    p/policyengine-us-data/policyengine_us_data/storage/download_private_p
+    r
+    erequisites.py
+    … +7 lines (ctrl+o to expand)
+
+● Read(~/devl/sep/policyengine-us-data/policyengine_us_data/storage/__init__.py)
+  ⎿  Read 6 lines
+
+● STORAGE_FOLDER is already defined in policyengine_us_data.storage. Let me update the script:
+
+● Update(build_calibration_matrix.py)
+  ⎿  Added 3 lines, removed 7 lines                                      
+      3  Generates X_sparse and target vector, prints diagnostics using M
+         atrixTracer.
+      4  """
+      5  import logging
+      6 -from pathlib import Path                                        
+      6  
+      7  logging.basicConfig(
+      8      level=logging.INFO,
+     ...
+      12  import numpy as np
+      13  import pandas as pd
+      14  from policyengine_us import Microsimulation
+      15 +from policyengine_us_data.storage import STORAGE_FOLDER        
+      16  from sparse_matrix_builder import SparseMatrixBuilder
+      17  from matrix_tracer import MatrixTracer
+      18  from calibration_utils import create_target_groups
+     ...
+      20  # =============================================================
+          ===============
+      21  # CONFIGURATION
+      22  # =============================================================
+          ===============
+      23 -# Resolve paths relative to this script's location             
+      24 -SCRIPT_DIR = Path(__file__).resolve().parent                   
+      25 -STORAGE_DIR = SCRIPT_DIR.parent.parent.parent / "storage"      
+      26 -                                                               
+      27 -db_path = STORAGE_DIR / "calibration" / "policy_data.db"       
+      23 +db_path = STORAGE_FOLDER / "calibration" / "policy_data.db"    
+      24  db_uri = f"sqlite:///{db_path}"
+      25  time_period = 2023
+      26  
+      27  # Base dataset for geo-stacking: stratified extended CPS
+      32 -dataset_path = STORAGE_DIR / "stratified_extended_cps_2023.h5" 
+      28 +dataset_path = STORAGE_FOLDER / "stratified_extended_cps_2023.h
+         +5"                                                             
+      29  
+      30  cds_to_calibrate = [
+      31      "101",  # Alabama CD-1
+
+● Bash(~/envs/sep/bin/python build_calibration_matrix.py 2>&1 | head -30)
+  ⎿ 2026-01-15 09:45:04,625 - INFO - NumExpr defaulting to 16 threads.   
+    2026-01-15 09:45:19,327 - INFO - Tracer initialized: 13507 households
+    x
+     5 geographies
+    … +31 lines (ctrl+o to expand)
+
+● Done. Now you can use STORAGE_FOLDER in IPython too:
+
+  from policyengine_us_data.storage import STORAGE_FOLDER
+  dataset_path = STORAGE_FOLDER / "stratified_extended_cps_2023.h5"
+
+✻ Sautéed for 1m 11s
+
+❯ Is there an opportunity to create a real test in tests? 
+
+● Search(pattern: "**/tests/**/*.py", path: "~/devl/sep/policyengine-us-data")
+  ⎿  Found 100 files (ctrl+o to expand)
+
+● Read(~/devl/sep/policyengine-us-data/policyengine_us_data/tests/test_database.
+      py)
+  ⎿  Read 87 lines
+
+● Read(~/devl/sep/policyengine-us-data/policyengine_us_data/tests/test_datasets/
+      test_small_enhanced_cps.py)
+  ⎿  Read 18 lines
+
+● Yes! I can create a test for the sparse matrix builder. Here's what we could test:
+
+  1. Person-level aggregation preserves totals
+  2. Matrix builds with correct shape
+  3. Health insurance target is included correctly
+
+● Write(~/devl/sep/policyengine-us-data/policyengine_us_data/tests/test_sparse_m
+       atrix_builder.py)
+  ⎿  Wrote 61 lines to ../../../tests/test_sparse_matrix_builder.py      
+      1 import pytest
+      2 import numpy as np
+      3 from policyengine_us import Microsimulation
+      4 from policyengine_us_data.storage import STORAGE_FOLDER
+      5 from policyengine_us_data.datasets.cps.local_area_calibration.spa
+        rse_matrix_builder import (
+      6     SparseMatrixBuilder,
+      7 )
+      8 @pytest.fixture
+     … +51 lines (ctrl+o to expand)
+
+● Bash(cd /home/baogorek/devl/sep/policyengine-us-data &&           timeout: 5m 
+      ~/envs/sep/bin/pytest                                         0s
+      policyengine_us_data/tests/test_sparse_matrix_builder.py -v
+      2>&1)
+  ⎿ ============================= test session starts                    
+    =====================
+    =========
+    platform linux -- Python 3.13.6, pytest-8.4.2, pluggy-1.6.0 --
+    /home/ba
+    … +16 lines (ctrl+o to expand)
+  ⎿ Shell cwd was reset to /home/baogorek/devl/sep/policyengine-us-data/po
+    licyengine_us_data/datasets/cps/local_area_calibration
+
+● All 3 tests pass:
+
+  1. test_person_level_aggregation_preserves_totals - Verifies person→household aggregation preserves totals
+  2. test_matrix_shape - Verifies matrix has correct dimensions
+  3. test_combined_snap_and_health_insurance - Verifies both SNAP and health insurance targets are included
+
+  policyengine_us_data/tests/test_sparse_matrix_builder.py - 3 passed in 12.33s
+
+✻ Cooked for 1m 1s
+
+❯ Cool, I'm surprised we didn't have one before. Let's go ahead and update  
+  the changelog_entry.yaml, create an issue, format with black, commit,     
+  push, and create the PR that links to the issue.                          
+
+● Search(pattern: "**/changelog_entry.yaml", path:
+        "~/devl/sep/policyengine-us-data")
+  ⎿  Found 1 file (ctrl+o to expand)
+
+● Read(~/devl/sep/policyengine-us-data/changelog_entry.yaml)
+  ⎿  Read 5 lines
+
+● Update(~/devl/sep/policyengine-us-data/changelog_entry.yaml)
+  ⎿  Added 4 lines, removed 3 lines                                      
+      1 -- bump: patch                                                   
+      1 +- bump: minor                                                   
+      2    changes:
+      3 -    fixed:                                                      
+      4 -    - Versioning workflow checkout for push events              
+      3 +    added:                                                      
+      4 +    - Support for health_insurance_premiums_without_medicare_par
+        +t_b in local area calibration                                   
+      5 +    - Test coverage for sparse matrix builder with person-level 
+        +targets                                                         
+
+● Bash(cd /home/baogorek/devl/sep/policyengine-us-data && git remote -v)
+  ⎿  origin	git@github.com:PolicyEngine/policyengine-us-data.git (fetch) 
+     origin	git@github.com:PolicyEngine/policyengine-us-data.git (push)
+  ⎿ Shell cwd was reset to /home/baogorek/devl/sep/policyengine-us-data/po
+    licyengine_us_data/datasets/cps/local_area_calibration
+
+● Bash(cd /home/baogorek/devl/sep/policyengine-us-data && gh issue create \
+        --repo PolicyEngine/policyengine-us-data \…)
+  ⎿  https://github.com/PolicyEngine/policyengine-us-data/issues/472     
+  ⎿ Shell cwd was reset to /home/baogorek/devl/sep/policyengine-us-data/po
+    licyengine_us_data/datasets/cps/local_area_calibration
+
+● Bash(cd /home/baogorek/devl/sep/policyengine-us-data && black . -l 79 2>&1)
+  ⎿ Skipping .ipynb files as Jupyter dependencies are not installed.     
+    You can fix this by running ``pip install "black[jupyter]"``
+    reformatted
+    /home/baogorek/devl/sep/policyengine-us-data/policyengine_u
+    … +6 lines (ctrl+o to expand)
+  ⎿ Shell cwd was reset to /home/baogorek/devl/sep/policyengine-us-data/po
+    licyengine_us_data/datasets/cps/local_area_calibration
+
+● Bash(cd /home/baogorek/devl/sep/policyengine-us-data && git status)
+  ⎿  On branch health-insurance-premiums                                 
+     Changes not staged for commit:
+       (use "git add <file>..." to update what will be committed)
+     … +12 lines (ctrl+o to expand)
+  ⎿ Shell cwd was reset to /home/baogorek/devl/sep/policyengine-us-data/po
+    licyengine_us_data/datasets/cps/local_area_calibration
+
+● Bash(cd /home/baogorek/devl/sep/policyengine-us-data && git add
+      changelog_entry.yaml policyengine_us_data/datasets/cps/local_area_calibrat
+      ion/build_calibration_matri…)
+  ⎿  On branch health-insurance-premiums                                 
+     Changes to be committed:
+       (use "git restore --staged <file>..." to unstage)
+     … +9 lines (ctrl+o to expand)
+  ⎿ Shell cwd was reset to /home/baogorek/devl/sep/policyengine-us-data/po
+    licyengine_us_data/datasets/cps/local_area_calibration
+
+● Bash(cd /home/baogorek/devl/sep/policyengine-us-data && git commit -m "Add
+      health insurance premiums to local area calibration…)
+  ⎿ [health-insurance-premiums 4268676] Add health insurance premiums to 
+    lo
+    cal area calibration
+     3 files changed, 263 insertions(+), 3 deletions(-)
+    … +4 lines (ctrl+o to expand)
+
+  ⎿ Shell cwd was reset to /home/baogorek/devl/sep/policyengine-us-data/po
+    licyengine_us_data/datasets/cps/local_area_calibration
+
+● Bash(cd /home/baogorek/devl/sep/policyengine-us-data && git push -u origin
+      health-insurance-premiums 2>&1)
+  ⎿ remote:                                                              
+    remote: Create a pull request for 'health-insurance-premiums' on
+    GitHub
+     by visiting:
+    … +13 lines (ctrl+o to expand)
+  ⎿ Shell cwd was reset to /home/baogorek/devl/sep/policyengine-us-data/po
+    licyengine_us_data/datasets/cps/local_area_calibration
+
+● Bash(cd /home/baogorek/devl/sep/policyengine-us-data && gh pr create --title
+      "Add health insurance premiums to local area calibration" --body "##
+      Summary…)
+  ⎿  https://github.com/PolicyEngine/policyengine-us-data/pull/473       
+     Warning: 1 uncommitted change
+  ⎿ Shell cwd was reset to /home/baogorek/devl/sep/policyengine-us-data/po
+    licyengine_us_data/datasets/cps/local_area_calibration
+
+● Done! Here's the summary:
+
+  - Issue: https://github.com/PolicyEngine/policyengine-us-data/issues/472
+  - PR: https://github.com/PolicyEngine/policyengine-us-data/pull/473
+
+  The PR includes:
+  - build_calibration_matrix.py - Runner script for building X_sparse with SNAP + health insurance
+  - test_sparse_matrix_builder.py - 3 tests for person-level aggregation
+  - changelog_entry.yaml - Minor version bump
+
+✻ Crunched for 1m 51s
+
+❯ You've done a lot of work and discovery in this session, can you please   
+  write up a report about what you've learned? This will be useful for your 
+  future self to come back in here and be ready for the next variable,      
+  which might not be quite as easy as this one!                             
+
+● Write(ADDING_CALIBRATION_TARGETS.md)
+  ⎿  Wrote 242 lines to ADDING_CALIBRATION_TARGETS.md                    
+      1 # Adding Calibration Targets to Local Area Geo-Stacking
+      2 This document summarizes key learnings from adding `health_insura
+        nce_premiums_without_medicare_part_b` as a calibration target. Us
+        e this as a reference when adding future targets.
+      3 ## Key Discovery: No Code Changes Needed for Most Targets
+      4 The `sparse_matrix_builder.py` is **already entity-agnostic**. Po
+        licyEngine's `map_to="household"` parameter automatically handles
+         aggregation from any entity level (person, tax_unit, spm_unit) t
+        o household level.
+      5 ```python
+      6 # This line in sparse_matrix_builder.py (line 220-222) handles AL
+        L entity types:
+     … +232 lines (ctrl+o to expand)
+
+● Created ADDING_CALIBRATION_TARGETS.md with:
+
+  1. Key Discovery: sparse_matrix_builder.py is already entity-agnostic via map_to="household"
+  2. Architecture Overview: File locations, database schema, stratum groups
+  3. Step-by-Step Guide: How to add new targets (5 steps)
+  4. When Code Changes ARE Needed: Count variables, state-specific calcs, person-level constraints
+  5. Junkyard Reference: Key patterns from the 2,400+ line reference implementation
+  6. Matrix Structure: Shape, column indexing, MatrixTracer usage
+  7. Common Queries: SQL snippets for exploring the database
+
+  Would you like me to add this report to the PR as well?
+
+✻ Sautéed for 53s
+
+
+
diff --git a/policyengine_us_data/datasets/cps/local_area_calibration/fit_calibration_weights.py b/policyengine_us_data/datasets/cps/local_area_calibration/fit_calibration_weights.py
new file mode 100644
index 00000000..f6521e23
--- /dev/null
+++ b/policyengine_us_data/datasets/cps/local_area_calibration/fit_calibration_weights.py
@@ -0,0 +1,237 @@
+"""
+Fit calibration weights using L0-regularized optimization.
+Prototype script for weight calibration using the l0-python package.
+"""
+
+import argparse
+import logging
+from datetime import datetime
+from pathlib import Path
+
+logging.basicConfig(
+    level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
+)
+
+parser = argparse.ArgumentParser(description="Fit calibration weights")
+parser.add_argument(
+    "--device", default="cpu", choices=["cpu", "cuda"],
+    help="Device for training (cpu or cuda)"
+)
+parser.add_argument(
+    "--epochs", type=int, default=100,
+    help="Total epochs for training"
+)
+parser.add_argument(
+    "--db-path", default=None,
+    help="Path to policy_data.db (default: STORAGE_FOLDER/calibration/policy_data.db)"
+)
+parser.add_argument(
+    "--dataset-path", default=None,
+    help="Path to stratified CPS h5 file"
+)
+args = parser.parse_args()
+
+import numpy as np
+from policyengine_us import Microsimulation
+from policyengine_us_data.storage import STORAGE_FOLDER
+from sparse_matrix_builder import SparseMatrixBuilder
+from calibration_utils import get_all_cds_from_database
+
+try:
+    from l0.calibration import SparseCalibrationWeights
+except ImportError:
+    raise ImportError(
+        "l0-python is required for weight fitting. "
+        "Install with: pip install policyengine-us-data[l0]"
+    )
+
+# ============================================================================
+# CONFIGURATION
+# ============================================================================
+DEVICE = args.device
+TOTAL_EPOCHS = args.epochs
+EPOCHS_PER_CHUNK = 50
+
+# Hyperparameters
+BETA = 0.35
+GAMMA = -0.1
+ZETA = 1.1
+INIT_KEEP_PROB = 0.999
+LOG_WEIGHT_JITTER_SD = 0.05
+LOG_ALPHA_JITTER_SD = 0.01
+LAMBDA_L0 = 1e-8
+LAMBDA_L2 = 1e-8
+LEARNING_RATE = 0.15
+
+# Data paths
+if args.db_path:
+    db_path = Path(args.db_path)
+else:
+    db_path = STORAGE_FOLDER / "calibration" / "policy_data.db"
+db_uri = f"sqlite:///{db_path}"
+
+if args.dataset_path:
+    dataset_path = Path(args.dataset_path)
+else:
+    dataset_path = STORAGE_FOLDER / "stratified_extended_cps_2023.h5"
+
+output_dir = STORAGE_FOLDER / "calibration"
+output_dir.mkdir(parents=True, exist_ok=True)
+time_period = 2023
+
+# Get all CDs from database
+cds_to_calibrate = get_all_cds_from_database(db_uri)
+print(f"Found {len(cds_to_calibrate)} congressional districts")
+
+# ============================================================================
+# STEP 1: BUILD CALIBRATION MATRIX
+# ============================================================================
+print(f"Loading simulation from {dataset_path}...")
+sim = Microsimulation(dataset=str(dataset_path))
+n_households = len(sim.calculate("household_id", map_to="household").values)
+print(f"Loaded {n_households:,} households")
+
+print("\nBuilding sparse matrix...")
+builder = SparseMatrixBuilder(
+    db_uri=db_uri,
+    time_period=time_period,
+    cds_to_calibrate=cds_to_calibrate,
+    dataset_path=str(dataset_path),
+)
+
+targets_df, X_sparse, household_id_mapping = builder.build_matrix(
+    sim,
+    target_filter={
+        "stratum_group_ids": [4],
+        "variables": ["health_insurance_premiums_without_medicare_part_b"],
+    },
+)
+
+print(f"Matrix shape: {X_sparse.shape}")
+print(f"Targets: {len(targets_df)}")
+
+# Filter to achievable targets (rows with non-zero data)
+row_sums = np.array(X_sparse.sum(axis=1)).flatten()
+achievable_mask = row_sums > 0
+n_achievable = achievable_mask.sum()
+n_impossible = (~achievable_mask).sum()
+
+print(f"\nAchievable targets: {n_achievable}")
+print(f"Impossible targets (filtered out): {n_impossible}")
+
+targets_df = targets_df[achievable_mask].reset_index(drop=True)
+X_sparse = X_sparse[achievable_mask, :]
+
+print(f"Filtered matrix shape: {X_sparse.shape}")
+
+# Extract target vector and names
+targets = targets_df["value"].values
+target_names = [
+    f"{row['geographic_id']}/{row['variable']}"
+    for _, row in targets_df.iterrows()
+]
+
+# ============================================================================
+# STEP 2: INITIALIZE WEIGHTS
+# ============================================================================
+initial_weights = np.ones(X_sparse.shape[1]) * 100
+print(f"\nInitial weights shape: {initial_weights.shape}")
+print(f"Initial weights sum: {initial_weights.sum():,.0f}")
+
+# ============================================================================
+# STEP 3: CREATE MODEL
+# ============================================================================
+print("\nCreating SparseCalibrationWeights model...")
+model = SparseCalibrationWeights(
+    n_features=X_sparse.shape[1],
+    beta=BETA,
+    gamma=GAMMA,
+    zeta=ZETA,
+    init_keep_prob=INIT_KEEP_PROB,
+    init_weights=initial_weights,
+    log_weight_jitter_sd=LOG_WEIGHT_JITTER_SD,
+    log_alpha_jitter_sd=LOG_ALPHA_JITTER_SD,
+    device=DEVICE,
+)
+
+# ============================================================================
+# STEP 4: TRAIN IN CHUNKS
+# ============================================================================
+timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+sparsity_log = []
+
+for chunk_start in range(0, TOTAL_EPOCHS, EPOCHS_PER_CHUNK):
+    chunk_epochs = min(EPOCHS_PER_CHUNK, TOTAL_EPOCHS - chunk_start)
+    current_epoch = chunk_start + chunk_epochs
+
+    print(f"\nTraining epochs {chunk_start + 1} to {current_epoch}...")
+
+    model.fit(
+        M=X_sparse,
+        y=targets,
+        target_groups=None,
+        lambda_l0=LAMBDA_L0,
+        lambda_l2=LAMBDA_L2,
+        lr=LEARNING_RATE,
+        epochs=chunk_epochs,
+        loss_type="relative",
+        verbose=True,
+        verbose_freq=chunk_epochs,
+    )
+
+    active_info = model.get_active_weights()
+    active_count = active_info["count"]
+    total_count = X_sparse.shape[1]
+    sparsity_pct = 100 * (1 - active_count / total_count)
+
+    sparsity_log.append(
+        {
+            "epoch": current_epoch,
+            "active_weights": active_count,
+            "total_weights": total_count,
+            "sparsity_pct": sparsity_pct,
+        }
+    )
+
+    print(f"  Active weights: {active_count:,} / {total_count:,}")
+    print(f"  Sparsity: {sparsity_pct:.2f}%")
+
+# ============================================================================
+# STEP 5: EXTRACT AND SAVE WEIGHTS
+# ============================================================================
+import torch
+
+with torch.no_grad():
+    w = model.get_weights(deterministic=True).cpu().numpy()
+
+print(f"\nFinal weights shape: {w.shape}")
+print(f"Final weights sum: {w.sum():,.0f}")
+print(f"Non-zero weights: {(w > 0).sum():,}")
+
+output_path = output_dir / f"calibration_weights_{timestamp}.npy"
+np.save(output_path, w)
+print(f"\nWeights saved to: {output_path}")
+print(f"OUTPUT_PATH:{output_path}")
+
+# ============================================================================
+# STEP 6: VERIFY PREDICTIONS
+# ============================================================================
+print("\n" + "=" * 60)
+print("PREDICTION VERIFICATION")
+print("=" * 60)
+
+with torch.no_grad():
+    predictions = model.predict(X_sparse).cpu().numpy()
+
+for i in range(len(targets)):
+    rel_error = (predictions[i] - targets[i]) / targets[i] * 100
+    print(
+        f"{target_names[i][:50]:50} | "
+        f"pred: {predictions[i]:>12,.0f} | "
+        f"target: {targets[i]:>12,.0f} | "
+        f"err: {rel_error:>6.2f}%"
+    )
+
+print("\n" + "=" * 60)
+print("FITTING COMPLETED")
+print("=" * 60)
diff --git a/policyengine_us_data/tests/test_sparse_matrix_builder.py b/policyengine_us_data/tests/test_sparse_matrix_builder.py
new file mode 100644
index 00000000..2d08ea3f
--- /dev/null
+++ b/policyengine_us_data/tests/test_sparse_matrix_builder.py
@@ -0,0 +1,64 @@
+import pytest
+import numpy as np
+from policyengine_us import Microsimulation
+from policyengine_us_data.storage import STORAGE_FOLDER
+from policyengine_us_data.datasets.cps.local_area_calibration.sparse_matrix_builder import (
+    SparseMatrixBuilder,
+)
+
+
+@pytest.fixture
+def sim():
+    dataset_path = STORAGE_FOLDER / "stratified_extended_cps_2023.h5"
+    return Microsimulation(dataset=str(dataset_path))
+
+
+@pytest.fixture
+def builder():
+    db_path = STORAGE_FOLDER / "calibration" / "policy_data.db"
+    db_uri = f"sqlite:///{db_path}"
+    cds_to_calibrate = ["101", "601"]  # AL-1, CA-1
+    return SparseMatrixBuilder(
+        db_uri=db_uri,
+        time_period=2023,
+        cds_to_calibrate=cds_to_calibrate,
+        dataset_path=None,
+    )
+
+
+def test_person_level_aggregation_preserves_totals(sim):
+    """Health insurance premiums (person-level) should sum correctly to household."""
+    var = "health_insurance_premiums_without_medicare_part_b"
+    person_total = sim.calculate(var, 2023, map_to="person").values.sum()
+    household_total = sim.calculate(var, 2023, map_to="household").values.sum()
+    assert np.isclose(person_total, household_total, rtol=1e-6)
+
+
+def test_matrix_shape(sim, builder):
+    """Matrix should have (n_targets, n_households * n_cds) shape."""
+    targets_df, X_sparse, _ = builder.build_matrix(
+        sim,
+        target_filter={
+            "variables": ["health_insurance_premiums_without_medicare_part_b"]
+        },
+    )
+    n_households = len(
+        sim.calculate("household_id", map_to="household").values
+    )
+    n_cds = 2
+    assert X_sparse.shape == (1, n_households * n_cds)
+
+
+def test_combined_snap_and_health_insurance(sim, builder):
+    """Matrix should include both SNAP and health insurance targets."""
+    targets_df, X_sparse, _ = builder.build_matrix(
+        sim,
+        target_filter={
+            "stratum_group_ids": [4],
+            "variables": ["health_insurance_premiums_without_medicare_part_b"],
+        },
+    )
+    variables = targets_df["variable"].unique()
+    assert "snap" in variables
+    assert "household_count" in variables
+    assert "health_insurance_premiums_without_medicare_part_b" in variables
diff --git a/pyproject.toml b/pyproject.toml
index 0bf25a16..ec9ddb9e 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -48,6 +48,9 @@ dependencies = [
 calibration = [
     "samplics",
 ]
+l0 = [
+    "l0-python",
+]
 
 [dependency-groups]
 dev = [