switchbox-data
diff --git a/‎.gitignore‎
Lines changed: 3 additions & 0 deletions b/‎.gitignore‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎rate_design/ny/hp_rates/scripts/generate_ny_hp_scenarios.py‎
Lines changed: 33 additions & 90 deletions b/‎rate_design/ny/hp_rates/scripts/generate_ny_hp_scenarios.py‎
Lines changed: 33 additions & 90 deletions
diff --git a/‎tests/test_cohort_builder.py‎
Lines changed: 0 additions & 204 deletions b/‎tests/test_cohort_builder.py‎
Lines changed: 0 additions & 204 deletions
@@ -13,6 +13,9 @@ rate_design/ny/hp_rates/data/cairo_cases/
 outputs/
 site/
 
+# Test outputs (generated during tests)
+tests/test_outputs/
+
 # Python packaging/build artifacts
 *.pyc
 *.pyo
 
@@ -3,11 +3,9 @@
 
 from pathlib import Path
 
-from utils.buildstock_io import get_load_curve_dir, get_metadata_path
-from utils.resstock_cumulative_adoption import (
-    build_cohort_table,
-    build_multiple_adoption_scenarios,
-    fetch_baseline_and_upgrade_data,
+from utils.mixed_adoption_trajectory import (
+    build_adoption_trajectory,
+    fetch_baseline_sample,
 )
 
 # Base data directory for NY HP rates (git-ignored raw/processed, configs versioned)
@@ -25,15 +23,13 @@
     # Download settings
     "output_dir": BASE_DATA_DIR / "buildstock_raw",
     "max_workers": 5,
-    # Sampling settings (set to None to use all buildings)
-    "sample_size": 1000,  # Number of buildings to sample (None = all buildings)
-    "sample_seed": 123,  # Seed for sampling reproducibility
+    # Sampling settings
+    "sample_size": 1000,  # Number of buildings to sample
+    "sample_seed": 123,  # Seed for sampling reproducibility (determines building ordering)
     # Adoption scenario settings
     "adoption_fractions": [0.1, 0.2, 0.3, 0.5, 0.8, 1.0],
-    "random_seed": 42,  # For reproducibility
     # Output settings
     "processed_dir": BASE_DATA_DIR / "buildstock_processed",
-    "cohort_table_path": BASE_DATA_DIR / "buildstock_processed" / "cohort_table.parquet",
 }
 
 
@@ -47,119 +43,66 @@ def main():
         print(f"  {key}: {value}")
     print("\n")
 
-    # Step 1: Fetch baseline and upgrade data
+    # Step 1: Fetch baseline sample and establish building ID ordering
     print("\n" + "=" * 80)
-    print("STEP 1: Fetching baseline and heat pump upgrade data")
+    print("STEP 1: Fetching baseline sample")
     print("=" * 80)
+    print(f"Fetching {CONFIG['sample_size']} baseline buildings (seed={CONFIG['sample_seed']})")
 
-    baseline_paths, hp_paths, failed = fetch_baseline_and_upgrade_data(
+    baseline_metadata_path, building_ids = fetch_baseline_sample(
+        sample_size=CONFIG["sample_size"],
+        random_seed=CONFIG["sample_seed"],
         release_year=CONFIG["release_year"],
         weather_file=CONFIG["weather_file"],
         release_version=CONFIG["release_version"],
         state=CONFIG["state"],
-        hp_upgrade_id=CONFIG["hp_upgrade_id"],
         output_dir=CONFIG["output_dir"],
         max_workers=CONFIG["max_workers"],
-        sample_size=CONFIG["sample_size"],
-        random_seed=CONFIG["sample_seed"],
     )
 
-    if failed:
-        print(f"\n⚠️  Warning: {len(failed)} files failed to download:")
-        for f in failed[:5]:  # Show first 5
-            print(f"  - {f}")
-        if len(failed) > 5:
-            print(f"  ... and {len(failed) - 5} more")
-
-    print(f"\n✓ Downloaded {len(baseline_paths)} baseline files")
-    print(f"✓ Downloaded {len(hp_paths)} HP upgrade files")
+    print(f"\n✓ Fetched {len(building_ids)} baseline buildings")
+    print(f"✓ Baseline metadata: {baseline_metadata_path}")
+    print(f"✓ Building ID ordering established (deterministic from seed)")
 
-    # Step 2: Build cohort table
+    # Step 2: Build adoption trajectory
     print("\n" + "=" * 80)
-    print("STEP 2: Building cohort table")
+    print("STEP 2: Building adoption trajectory")
     print("=" * 80)
+    print(f"Creating scenarios for adoption fractions: {CONFIG['adoption_fractions']}")
+    print("Note: Upgrade data will be fetched incrementally for each fraction")
 
-    # Get metadata paths
-    baseline_metadata = get_metadata_path(
-        output_dir=CONFIG["output_dir"],
-        release_year=CONFIG["release_year"],
-        weather_file=CONFIG["weather_file"],
-        release_version=CONFIG["release_version"],
-        upgrade_id="0",
-    )
-
-    hp_metadata = get_metadata_path(
-        output_dir=CONFIG["output_dir"],
-        release_year=CONFIG["release_year"],
-        weather_file=CONFIG["weather_file"],
-        release_version=CONFIG["release_version"],
+    scenario_paths = build_adoption_trajectory(
+        baseline_metadata_path=baseline_metadata_path,
+        baseline_building_ids=building_ids,
+        adoption_fractions=CONFIG["adoption_fractions"],
         upgrade_id=CONFIG["hp_upgrade_id"],
-    )
-
-    # Get load curve directories
-    baseline_load_dir = get_load_curve_dir(
-        output_dir=CONFIG["output_dir"],
         release_year=CONFIG["release_year"],
         weather_file=CONFIG["weather_file"],
         release_version=CONFIG["release_version"],
+        state=CONFIG["state"],
+        output_dir=CONFIG["output_dir"],
+        max_workers=CONFIG["max_workers"],
+        output_processed_dir=CONFIG["processed_dir"],
     )
 
-    hp_load_dir = baseline_load_dir  # Same directory, different upgrade subdirs
-
-    print(f"Baseline metadata: {baseline_metadata}")
-    print(f"HP metadata: {hp_metadata}")
-    print(f"Load curve directory: {baseline_load_dir}")
-
-    cohort_df = build_cohort_table(
-        baseline_metadata_path=baseline_metadata,
-        hp_metadata_path=hp_metadata,
-        baseline_load_dir=baseline_load_dir,
-        hp_load_dir=hp_load_dir,
-        output_path=CONFIG["cohort_table_path"],
-    )
-
-    print(f"\n✓ Built cohort table with {len(cohort_df)} buildings")
-    print(f"✓ Saved to {CONFIG['cohort_table_path']}")
-
-    # Show sample of cohort table
-    print("\nCohort table sample:")
-    print(cohort_df.head(3))
-
-    # Show heat pump type distribution if available
-    if "hp_type" in cohort_df.columns:
-        print("\nHeat pump type distribution:")
-        hp_type_counts = cohort_df.group_by("hp_type").count().sort("count", descending=True)
-        for row in hp_type_counts.iter_rows(named=True):
-            print(f"  {row['hp_type']}: {row['count']} buildings")
-
-    # Step 3: Build multiple adoption scenarios
-    print("\n" + "=" * 80)
-    print("STEP 3: Building adoption scenarios")
-    print("=" * 80)
-
-    scenario_paths = build_multiple_adoption_scenarios(
-        cohort_df=cohort_df,
-        adoption_fractions=CONFIG["adoption_fractions"],
-        seed=CONFIG["random_seed"],
-        output_dir=CONFIG["processed_dir"],
-        include_metadata=True,
-    )
-
+    # Summary
     print("\n" + "=" * 80)
     print("COMPLETE - Scenario Summary")
     print("=" * 80)
     print(f"\nGenerated {len(scenario_paths)} adoption scenarios:")
     for fraction, path in sorted(scenario_paths.items()):
-        print(f"  {fraction*100:3.0f}% adoption → {path}")
+        n_adopters = int(round(fraction * len(building_ids)))
+        print(f"  {fraction*100:3.0f}% adoption ({n_adopters:4d} buildings) → {path.name}")
 
-    print(f"\nAll scenarios use the same random seed ({CONFIG['random_seed']}) ensuring:")
+    print(f"\nAll scenarios use seed {CONFIG['sample_seed']} ensuring:")
     print("  - Reproducibility: Re-running with same seed gives identical results")
     print("  - Cumulative property: Adopters at X% ⊆ Adopters at Y% for X < Y")
+    print("  - Efficiency: Upgrade data fetched only for buildings that adopt")
 
     print("\nNext steps:")
     print("  - Load scenarios with: pl.read_parquet(path)")
+    print("  - Check 'adopted' column (0=baseline, 1=upgrade)")
     print("  - Use for GenX/CAIRO modeling")
-    print("  - Analyze adoption trajectories")
     print("\n✓ Done!")