Add mixed adoption trajectory utilities and scenario script

sherryzuo · sherryzuo · commit 3535dd601b94 · 2025-12-09T19:40:36.000-05:00
diff --git a/rate_design/ny/hp_rates/scripts/generate_ny_hp_scenarios.py b/rate_design/ny/hp_rates/scripts/generate_ny_hp_scenarios.py
@@ -0,0 +1,110 @@
+#!/usr/bin/env python
+"""Generate NY heat pump adoption scenarios with cumulative adoption."""
+
+from pathlib import Path
+
+from utils.mixed_adoption_trajectory import (
+    build_adoption_trajectory,
+    fetch_baseline_sample,
+)
+
+# Base data directory for NY HP rates (git-ignored raw/processed, configs versioned)
+BASE_DATA_DIR = Path("rate_design/ny/hp_rates/data")
+
+# Configuration
+CONFIG = {
+    # ResStock release parameters
+    "release_year": "2024",
+    "weather_file": "tmy3",
+    "release_version": "2",
+    "state": "NY",
+    # Heat pump upgrade ID (adjust based on your ResStock release)
+    "hp_upgrade_id": "1",
+    # Download settings
+    "output_dir": BASE_DATA_DIR / "buildstock_raw",
+    "max_workers": 5,
+    # Sampling settings
+    "sample_size": 1000,  # Number of buildings to sample
+    "sample_seed": 123,  # Seed for sampling reproducibility (determines building ordering)
+    # Adoption scenario settings
+    "adoption_fractions": [0.1, 0.2, 0.3, 0.5, 0.8, 1.0],
+    # Output settings
+    "processed_dir": BASE_DATA_DIR / "buildstock_processed",
+}
+
+
+def main():
+    """Run the complete workflow to generate adoption scenarios."""
+    print("=" * 80)
+    print("NY Heat Pump Cumulative Adoption Scenario Generator")
+    print("=" * 80)
+    print("\nConfiguration:")
+    for key, value in CONFIG.items():
+        print(f"  {key}: {value}")
+    print("\n")
+
+    # Step 1: Fetch baseline sample and establish building ID ordering
+    print("\n" + "=" * 80)
+    print("STEP 1: Fetching baseline sample")
+    print("=" * 80)
+    print(f"Fetching {CONFIG['sample_size']} baseline buildings (seed={CONFIG['sample_seed']})")
+
+    baseline_metadata_path, building_ids = fetch_baseline_sample(
+        sample_size=CONFIG["sample_size"],
+        random_seed=CONFIG["sample_seed"],
+        release_year=CONFIG["release_year"],
+        weather_file=CONFIG["weather_file"],
+        release_version=CONFIG["release_version"],
+        state=CONFIG["state"],
+        output_dir=CONFIG["output_dir"],
+        max_workers=CONFIG["max_workers"],
+    )
+
+    print(f"\n✓ Fetched {len(building_ids)} baseline buildings")
+    print(f"✓ Baseline metadata: {baseline_metadata_path}")
+    print(f"✓ Building ID ordering established (deterministic from seed)")
+
+    # Step 2: Build adoption trajectory
+    print("\n" + "=" * 80)
+    print("STEP 2: Building adoption trajectory")
+    print("=" * 80)
+    print(f"Creating scenarios for adoption fractions: {CONFIG['adoption_fractions']}")
+    print("Note: Upgrade data will be fetched incrementally for each fraction")
+
+    scenario_paths = build_adoption_trajectory(
+        baseline_metadata_path=baseline_metadata_path,
+        baseline_building_ids=building_ids,
+        adoption_fractions=CONFIG["adoption_fractions"],
+        upgrade_id=CONFIG["hp_upgrade_id"],
+        release_year=CONFIG["release_year"],
+        weather_file=CONFIG["weather_file"],
+        release_version=CONFIG["release_version"],
+        state=CONFIG["state"],
+        output_dir=CONFIG["output_dir"],
+        max_workers=CONFIG["max_workers"],
+        output_processed_dir=CONFIG["processed_dir"],
+    )
+
+    # Summary
+    print("\n" + "=" * 80)
+    print("COMPLETE - Scenario Summary")
+    print("=" * 80)
+    print(f"\nGenerated {len(scenario_paths)} adoption scenarios:")
+    for fraction, path in sorted(scenario_paths.items()):
+        n_adopters = int(round(fraction * len(building_ids)))
+        print(f"  {fraction*100:3.0f}% adoption ({n_adopters:4d} buildings) → {path.name}")
+
+    print(f"\nAll scenarios use seed {CONFIG['sample_seed']} ensuring:")
+    print("  - Reproducibility: Re-running with same seed gives identical results")
+    print("  - Cumulative property: Adopters at X% ⊆ Adopters at Y% for X < Y")
+    print("  - Efficiency: Upgrade data fetched only for buildings that adopt")
+
+    print("\nNext steps:")
+    print("  - Load scenarios with: pl.read_parquet(path)")
+    print("  - Check 'adopted' column (0=baseline, 1=upgrade)")
+    print("  - Use for GenX/CAIRO modeling")
+    print("\n✓ Done!")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tests/test_cumulative_adoption.py b/tests/test_cumulative_adoption.py
@@ -0,0 +1,193 @@
+"""Tests for cumulative adoption sampling utilities."""
+
+import numpy as np
+import pytest
+
+from rate_design.utils.resstock_cumulative_adoption import (
+    generate_random_ordering,
+    select_cumulative_adopters,
+)
+
+
+class TestRandomOrdering:
+    """Tests for generate_random_ordering function."""
+
+    def test_reproducibility_with_seed(self):
+        """Test that same seed produces same ordering."""
+        bldg_ids = list(range(100))
+        seed = 42
+
+        ordering1 = generate_random_ordering(bldg_ids, seed=seed)
+        ordering2 = generate_random_ordering(bldg_ids, seed=seed)
+
+        assert ordering1 == ordering2
+
+    def test_different_seeds_produce_different_orderings(self):
+        """Test that different seeds produce different orderings."""
+        bldg_ids = list(range(100))
+
+        ordering1 = generate_random_ordering(bldg_ids, seed=42)
+        ordering2 = generate_random_ordering(bldg_ids, seed=43)
+
+        assert ordering1 != ordering2
+
+    def test_all_ids_present(self):
+        """Test that shuffling preserves all IDs."""
+        bldg_ids = list(range(50))
+        ordering = generate_random_ordering(bldg_ids, seed=42)
+
+        assert set(ordering) == set(bldg_ids)
+        assert len(ordering) == len(bldg_ids)
+
+    def test_ordering_is_permutation(self):
+        """Test that output is a valid permutation."""
+        bldg_ids = [10, 20, 30, 40, 50]
+        ordering = generate_random_ordering(bldg_ids, seed=42)
+
+        assert sorted(ordering) == sorted(bldg_ids)
+
+    def test_none_seed_is_non_deterministic(self):
+        """Test that None seed produces different results (with high probability)."""
+        bldg_ids = list(range(100))
+
+        # Run multiple times - should get different results
+        orderings = [generate_random_ordering(bldg_ids, seed=None) for _ in range(5)]
+
+        # At least some should be different (extremely unlikely to get 5 identical random shuffles)
+        unique_orderings = [tuple(o) for o in orderings]
+        assert len(set(unique_orderings)) > 1
+
+
+class TestCumulativeAdopters:
+    """Tests for select_cumulative_adopters function."""
+
+    def test_zero_fraction(self):
+        """Test that 0% adoption returns empty set."""
+        ordering = list(range(100))
+        adopters = select_cumulative_adopters(ordering, 0.0)
+
+        assert len(adopters) == 0
+        assert isinstance(adopters, set)
+
+    def test_full_adoption(self):
+        """Test that 100% adoption returns all buildings."""
+        ordering = list(range(100))
+        adopters = select_cumulative_adopters(ordering, 1.0)
+
+        assert len(adopters) == 100
+        assert adopters == set(ordering)
+
+    def test_fraction_rounding(self):
+        """Test proper rounding of fractional counts."""
+        ordering = list(range(100))
+
+        # 10% of 100 = 10
+        adopters = select_cumulative_adopters(ordering, 0.1)
+        assert len(adopters) == 10
+
+        # 25% of 100 = 25
+        adopters = select_cumulative_adopters(ordering, 0.25)
+        assert len(adopters) == 25
+
+    def test_cumulative_property(self):
+        """Test that adopters at lower fractions are subset of higher fractions."""
+        ordering = list(range(1000))
+
+        adopters_10 = select_cumulative_adopters(ordering, 0.1)
+        adopters_20 = select_cumulative_adopters(ordering, 0.2)
+        adopters_50 = select_cumulative_adopters(ordering, 0.5)
+        adopters_80 = select_cumulative_adopters(ordering, 0.8)
+
+        # Check cumulative property
+        assert adopters_10.issubset(adopters_20)
+        assert adopters_20.issubset(adopters_50)
+        assert adopters_50.issubset(adopters_80)
+
+        # Verify sizes
+        assert len(adopters_10) == 100
+        assert len(adopters_20) == 200
+        assert len(adopters_50) == 500
+        assert len(adopters_80) == 800
+
+    def test_selects_from_beginning_of_ordering(self):
+        """Test that selection comes from beginning of ordering."""
+        ordering = [10, 20, 30, 40, 50, 60, 70, 80, 90, 100]
+
+        adopters = select_cumulative_adopters(ordering, 0.3)  # 30% = 3 buildings
+
+        # Should be first 3 from ordering
+        assert adopters == {10, 20, 30}
+
+    def test_invalid_fraction_raises_error(self):
+        """Test that invalid fractions raise ValueError."""
+        ordering = list(range(100))
+
+        with pytest.raises(ValueError):
+            select_cumulative_adopters(ordering, -0.1)
+
+        with pytest.raises(ValueError):
+            select_cumulative_adopters(ordering, 1.5)
+
+    def test_edge_case_single_building(self):
+        """Test with single building."""
+        ordering = [42]
+
+        adopters_0 = select_cumulative_adopters(ordering, 0.0)
+        adopters_50 = select_cumulative_adopters(ordering, 0.5)
+        adopters_100 = select_cumulative_adopters(ordering, 1.0)
+
+        assert len(adopters_0) == 0
+        assert len(adopters_50) == 0  # 0.5 * 1 = 0.5, rounds to 0
+        assert len(adopters_100) == 1
+
+
+class TestIntegration:
+    """Integration tests combining ordering and selection."""
+
+    def test_end_to_end_cumulative_workflow(self):
+        """Test complete workflow for generating cumulative adoption sets."""
+        # Simulate a cohort
+        bldg_ids = list(range(1000, 2000))  # 1000 buildings
+        seed = 12345
+
+        # Generate ordering
+        ordering = generate_random_ordering(bldg_ids, seed=seed)
+
+        # Generate multiple adoption fractions
+        fractions = [0.0, 0.1, 0.2, 0.3, 0.5, 0.8, 1.0]
+        adoption_sets = {}
+
+        for f in fractions:
+            adoption_sets[f] = select_cumulative_adopters(ordering, f)
+
+        # Verify cumulative property across all fractions
+        for i in range(len(fractions) - 1):
+            f1 = fractions[i]
+            f2 = fractions[i + 1]
+            assert adoption_sets[f1].issubset(adoption_sets[f2])
+
+        # Verify sizes
+        assert len(adoption_sets[0.0]) == 0
+        assert len(adoption_sets[0.1]) == 100
+        assert len(adoption_sets[0.5]) == 500
+        assert len(adoption_sets[1.0]) == 1000
+
+    def test_reproducibility_across_sessions(self):
+        """Test that workflow is reproducible across different runs."""
+        bldg_ids = list(range(500))
+        seed = 999
+
+        # Session 1
+        ordering1 = generate_random_ordering(bldg_ids, seed=seed)
+        adopters1_20 = select_cumulative_adopters(ordering1, 0.2)
+        adopters1_50 = select_cumulative_adopters(ordering1, 0.5)
+
+        # Session 2 (simulating re-run)
+        ordering2 = generate_random_ordering(bldg_ids, seed=seed)
+        adopters2_20 = select_cumulative_adopters(ordering2, 0.2)
+        adopters2_50 = select_cumulative_adopters(ordering2, 0.5)
+
+        # Should be identical
+        assert ordering1 == ordering2
+        assert adopters1_20 == adopters2_20
+        assert adopters1_50 == adopters2_50
diff --git a/utils/mixed_adoption_trajectory.py b/utils/mixed_adoption_trajectory.py