diff --git a/.gitignore b/.gitignore index f080fdd..543e4f4 100644 --- a/.gitignore +++ b/.gitignore @@ -7,9 +7,15 @@ __pycache__/ # Local data and outputs data/ +rate_design/ny/hp_rates/data/buildstock_raw/ +rate_design/ny/hp_rates/data/buildstock_processed/ +rate_design/ny/hp_rates/data/cairo_cases/ outputs/ site/ +# Test outputs (generated during tests) +tests/test_outputs/ + # Python packaging/build artifacts *.pyc *.pyo diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..1d59f79 --- /dev/null +++ b/LICENSE @@ -0,0 +1 @@ +Placeholder license. Replace with the appropriate license text for this project. diff --git a/README.md b/README.md index 291272f..59d8960 100644 --- a/README.md +++ b/README.md @@ -4,10 +4,15 @@ This repository is a clean scaffold for rate design analysis, focused on New Yor ## Layout -- `src/rate_design/` — package skeleton for shared logic, utilities, and New York–specific code. -- `data/ny/` — local cache for BuildStock and CAIRO inputs/outputs (kept out of git). -- `scripts/` — helper scripts (e.g., running a NY heat pump rate scenario). -- `tests/` — placeholder test files to fill in alongside new code. +- `rate_design/` — package root. + - `ny/hp_rates/` + - `data/` — local inputs/outputs; `buildstock_*` and `cairo_cases/` are git-ignored. Configs under `tariff_structure/` and `tariff_mapping/` stay versioned. + - `scenarios/` — YAML configs selecting tariffs/mappings and other simulation parameters. + - `scripts/` — helpers such as customer selection, tariff builders, and case path helpers. + - `Justfile` — NY HP-specific recipes (stub). + - `ny/ev_rates/` — stubbed EV structure (data, scenarios, scripts, Justfile). +- `utils/` — cross-jurisdiction utilities (buildstock IO, S3 sync, conversions). +- `tests/` — placeholder test files to expand alongside code. ## Notes -- Data under `data/` should remain local or synced via S3 tooling you add; keep large artifacts out of git. +- Data under `rate_design/ny/hp_rates/data/` (buildstock raw/processed, cairo cases) should remain local or synced via S3 tooling you add; keep large artifacts out of git. diff --git a/pyproject.toml b/pyproject.toml index 2125d73..fd5d209 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -5,7 +5,7 @@ description = "Skeleton for the rate design platform." readme = "README.md" requires-python = ">=3.11" authors = [{ name = "Switchbox Data" }] -license = { file = "LICENSE" } +license = "LicenseRef-Proprietary" dependencies = [ "buildstock-fetch", "cairo", @@ -22,11 +22,23 @@ requires = ["setuptools>=61.0"] build-backend = "setuptools.build_meta" [tool.setuptools] -package-dir = { "" = "src" } +package-dir = { "" = "." } [tool.setuptools.packages.find] -where = ["src"] -include = ["rate_design*"] +where = ["."] +include = ["rate_design*", "utils*"] + +[tool.setuptools.package-data] +"rate_design.ny.hp_rates" = [ + "scenarios/*.yaml", + "data/tariff_structure/*.json", + "data/tariff_mapping/*.csv", +] [tool.uv.sources] cairo = { git = "https://github.com/NREL/CAIRO.git", rev = "tb/dev_package" } + +[tool.pytest.ini_options] +markers = [ + "integration: marks tests as integration tests that require API access (deselect with '-m \"not integration\"')", +] diff --git a/src/rate_design/__init__.py b/rate_design/__init__.py similarity index 100% rename from src/rate_design/__init__.py rename to rate_design/__init__.py diff --git a/src/rate_design/ny/__init__.py b/rate_design/ny/__init__.py similarity index 100% rename from src/rate_design/ny/__init__.py rename to rate_design/ny/__init__.py diff --git a/rate_design/ny/hp_rates/Justfile b/rate_design/ny/hp_rates/Justfile new file mode 100644 index 0000000..fcc7a7c --- /dev/null +++ b/rate_design/ny/hp_rates/Justfile @@ -0,0 +1 @@ +# NY heat pump tasks (placeholder). diff --git a/src/rate_design/ny/hp_rates/__init__.py b/rate_design/ny/hp_rates/__init__.py similarity index 100% rename from src/rate_design/ny/hp_rates/__init__.py rename to rate_design/ny/hp_rates/__init__.py diff --git a/src/rate_design/ny/hp_rates/case_generation.py b/rate_design/ny/hp_rates/case_generation.py similarity index 100% rename from src/rate_design/ny/hp_rates/case_generation.py rename to rate_design/ny/hp_rates/case_generation.py diff --git a/src/rate_design/ny/hp_rates/postprocess.py b/rate_design/ny/hp_rates/postprocess.py similarity index 100% rename from src/rate_design/ny/hp_rates/postprocess.py rename to rate_design/ny/hp_rates/postprocess.py diff --git a/src/rate_design/ny/hp_rates/run_scenario.py b/rate_design/ny/hp_rates/run_scenario.py similarity index 100% rename from src/rate_design/ny/hp_rates/run_scenario.py rename to rate_design/ny/hp_rates/run_scenario.py diff --git a/src/rate_design/ny/hp_rates/configs/ny_hp_baseline.yaml b/rate_design/ny/hp_rates/scenarios/ny_hp_baseline.yaml similarity index 100% rename from src/rate_design/ny/hp_rates/configs/ny_hp_baseline.yaml rename to rate_design/ny/hp_rates/scenarios/ny_hp_baseline.yaml diff --git a/src/rate_design/ny/hp_rates/configs/ny_hp_seasonal_rate.yaml b/rate_design/ny/hp_rates/scenarios/ny_hp_seasonal_rate.yaml similarity index 100% rename from src/rate_design/ny/hp_rates/configs/ny_hp_seasonal_rate.yaml rename to rate_design/ny/hp_rates/scenarios/ny_hp_seasonal_rate.yaml diff --git a/rate_design/ny/hp_rates/scripts/__init__.py b/rate_design/ny/hp_rates/scripts/__init__.py new file mode 100644 index 0000000..3687678 --- /dev/null +++ b/rate_design/ny/hp_rates/scripts/__init__.py @@ -0,0 +1 @@ +"""NY heat pump helper scripts.""" diff --git a/rate_design/ny/hp_rates/scripts/generate_ny_hp_scenarios.py b/rate_design/ny/hp_rates/scripts/generate_ny_hp_scenarios.py new file mode 100644 index 0000000..a044483 --- /dev/null +++ b/rate_design/ny/hp_rates/scripts/generate_ny_hp_scenarios.py @@ -0,0 +1,99 @@ +#!/usr/bin/env python +"""Generate NY heat pump adoption scenarios with cumulative adoption.""" + +from pathlib import Path + +from utils.mixed_adoption_trajectory import ( + build_adoption_trajectory, + fetch_baseline_sample, +) + +# Base data directory for NY HP rates (git-ignored raw/processed, configs versioned) +BASE_DATA_DIR = Path("rate_design/ny/hp_rates/data") + +# Configuration +CONFIG = { + # ResStock release parameters + "release_year": "2024", + "weather_file": "tmy3", + "release_version": "2", + "state": "NY", + # Heat pump upgrade ID (adjust based on your ResStock release) + "hp_upgrade_id": "1", + # Download settings + "output_dir": BASE_DATA_DIR / "buildstock_raw", + "max_workers": 5, + # Sampling settings + "sample_size": 1000, # Number of buildings to sample + "sample_seed": 123, # Seed for sampling reproducibility (determines building ordering) + # Adoption scenario settings + "adoption_fractions": [0.1, 0.2, 0.3, 0.5, 0.8, 1.0], + # Output settings + "processed_dir": BASE_DATA_DIR / "buildstock_processed", +} + + +def main(): + """Run the complete workflow to generate adoption scenarios.""" + print("=" * 80) + print("NY Heat Pump Cumulative Adoption Scenario Generator") + print("=" * 80) + print("\nConfiguration:") + for key, value in CONFIG.items(): + print(f" {key}: {value}") + print("\n") + + # Step 1: Fetch baseline sample and establish building ID ordering + print("\n" + "=" * 80) + print("STEP 1: Fetching baseline sample") + print("=" * 80) + print(f"Fetching {CONFIG['sample_size']} baseline buildings (seed={CONFIG['sample_seed']})") + + baseline_metadata_path, building_ids = fetch_baseline_sample( + sample_size=CONFIG["sample_size"], + random_seed=CONFIG["sample_seed"], + release_year=CONFIG["release_year"], + weather_file=CONFIG["weather_file"], + release_version=CONFIG["release_version"], + state=CONFIG["state"], + output_dir=CONFIG["output_dir"], + max_workers=CONFIG["max_workers"], + ) + + print(f"\n✓ Fetched {len(building_ids)} baseline buildings") + print(f"✓ Baseline metadata: {baseline_metadata_path}") + print(f"✓ Building ID ordering established (deterministic from seed)") + + # Step 2: Build adoption trajectory + print("\n" + "=" * 80) + print("STEP 2: Building adoption trajectory") + print("=" * 80) + print(f"Creating scenarios for adoption fractions: {CONFIG['adoption_fractions']}") + print("Note: Upgrade data will be fetched incrementally for each fraction") + + scenario_paths = build_adoption_trajectory( + baseline_metadata_path=baseline_metadata_path, + baseline_building_ids=building_ids, + adoption_fractions=CONFIG["adoption_fractions"], + upgrade_id=CONFIG["hp_upgrade_id"], + release_year=CONFIG["release_year"], + weather_file=CONFIG["weather_file"], + release_version=CONFIG["release_version"], + state=CONFIG["state"], + output_dir=CONFIG["output_dir"], + max_workers=CONFIG["max_workers"], + output_processed_dir=CONFIG["processed_dir"], + ) + + # Summary + print("\n" + "=" * 80) + print("COMPLETE - Scenario Summary") + print("=" * 80) + print(f"\nGenerated {len(scenario_paths)} adoption scenarios:") + for fraction, path in sorted(scenario_paths.items()): + n_adopters = int(round(fraction * len(building_ids))) + print(f" {fraction*100:3.0f}% adoption ({n_adopters:4d} buildings) → {path.name}") + + +if __name__ == "__main__": + main() diff --git a/src/rate_design/ny/utils/hp_customer_selection.py b/rate_design/ny/hp_rates/scripts/hp_customer_selection.py similarity index 100% rename from src/rate_design/ny/utils/hp_customer_selection.py rename to rate_design/ny/hp_rates/scripts/hp_customer_selection.py diff --git a/src/rate_design/ny/utils/ny_case_paths.py b/rate_design/ny/hp_rates/scripts/ny_case_paths.py similarity index 100% rename from src/rate_design/ny/utils/ny_case_paths.py rename to rate_design/ny/hp_rates/scripts/ny_case_paths.py diff --git a/src/rate_design/ny/utils/ny_tariff_builder.py b/rate_design/ny/hp_rates/scripts/ny_tariff_builder.py similarity index 100% rename from src/rate_design/ny/utils/ny_tariff_builder.py rename to rate_design/ny/hp_rates/scripts/ny_tariff_builder.py diff --git a/scripts/run_ny_hp_rate_scenario.sh b/rate_design/ny/hp_rates/scripts/run_ny_hp_rate_scenario.sh similarity index 100% rename from scripts/run_ny_hp_rate_scenario.sh rename to rate_design/ny/hp_rates/scripts/run_ny_hp_rate_scenario.sh diff --git a/src/rate_design/core/__init__.py b/src/rate_design/core/__init__.py deleted file mode 100644 index 64e5ca7..0000000 --- a/src/rate_design/core/__init__.py +++ /dev/null @@ -1 +0,0 @@ -"""Shared, jurisdiction-agnostic logic for the rate design platform.""" diff --git a/src/rate_design/core/pipeline.py b/src/rate_design/core/pipeline.py deleted file mode 100644 index b555fdb..0000000 --- a/src/rate_design/core/pipeline.py +++ /dev/null @@ -1,3 +0,0 @@ -"""Stub module for orchestrating the end-to-end rate design pipeline.""" - -# TODO: wire together ingestion, transformation, and outputs. diff --git a/src/rate_design/core/postprocess.py b/src/rate_design/core/postprocess.py deleted file mode 100644 index dc56c6e..0000000 --- a/src/rate_design/core/postprocess.py +++ /dev/null @@ -1,3 +0,0 @@ -"""Stub module for postprocessing metrics and reporting.""" - -# TODO: calculate KPIs, export summaries, and prepare artifacts. diff --git a/src/rate_design/core/rates.py b/src/rate_design/core/rates.py deleted file mode 100644 index aedb0e2..0000000 --- a/src/rate_design/core/rates.py +++ /dev/null @@ -1,3 +0,0 @@ -"""Stub module for rate definitions.""" - -# TODO: implement rate object structures and helpers. diff --git a/src/rate_design/core/scenarios.py b/src/rate_design/core/scenarios.py deleted file mode 100644 index be636c6..0000000 --- a/src/rate_design/core/scenarios.py +++ /dev/null @@ -1,3 +0,0 @@ -"""Stub module for scenario composition and configuration.""" - -# TODO: add scenario models and configuration loaders. diff --git a/src/rate_design/ny/Justfile b/src/rate_design/ny/Justfile deleted file mode 100644 index 03a3e18..0000000 --- a/src/rate_design/ny/Justfile +++ /dev/null @@ -1 +0,0 @@ -# NY-specific tasks go here (placeholder). diff --git a/src/rate_design/ny/utils/__init__.py b/src/rate_design/ny/utils/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/src/rate_design/utils/buildstock_io.py b/src/rate_design/utils/buildstock_io.py deleted file mode 100644 index 531a1c1..0000000 --- a/src/rate_design/utils/buildstock_io.py +++ /dev/null @@ -1,3 +0,0 @@ -"""Stub IO helpers for working with BuildStock artifacts in data/ny/.""" - -# TODO: add load/save helpers for raw and processed BuildStock data. diff --git a/tests/test_utils/__init__.py b/tests/test_utils/__init__.py new file mode 100644 index 0000000..a7cc2b4 --- /dev/null +++ b/tests/test_utils/__init__.py @@ -0,0 +1 @@ +"""Tests for utils modules.""" diff --git a/tests/test_utils/test_buildstock_io.py b/tests/test_utils/test_buildstock_io.py new file mode 100644 index 0000000..1aa098d --- /dev/null +++ b/tests/test_utils/test_buildstock_io.py @@ -0,0 +1,191 @@ +"""Tests for utils/buildstock_io.py. + +Each test function corresponds to one key function in buildstock_io.py. +Tests that interact with buildstock-fetch save outputs to tests/test_outputs/buildstock_io/ (git-ignored). +Path construction tests are self-contained and don't require fixtures or API calls. +""" + +from pathlib import Path + +import pytest + +from utils.buildstock_io import ( + fetch_for_building_ids, + fetch_sample, + get_buildstock_release_dir, + get_load_curve_dir, + get_load_curve_path, + get_metadata_path, +) + +# Test output directory (git-ignored) +TEST_OUTPUT_DIR = Path(__file__).parent.parent / "test_outputs" / "buildstock_io" + + +# ============================================================================== +# Unit tests - No fixtures or API calls required +# ============================================================================== + + +def test_get_buildstock_release_dir(): + """Test get_buildstock_release_dir constructs correct directory path.""" + output_dir = Path("/data/resstock") + release_dir = get_buildstock_release_dir( + output_dir=output_dir, + release_year="2024", + weather_file="tmy3", + release_version="2", + ) + assert release_dir == output_dir / "res_2024_tmy3_2" + + +def test_get_metadata_path(): + """Test get_metadata_path constructs correct metadata file path.""" + output_dir = Path("/data/resstock") + metadata_path = get_metadata_path( + output_dir=output_dir, + release_year="2024", + weather_file="tmy3", + release_version="2", + upgrade_id="0", + state="NY", + ) + expected = output_dir / "res_2024_tmy3_2" / "metadata" / "state=NY" / "upgrade=00" / "metadata.parquet" + assert metadata_path == expected + + +def test_get_load_curve_dir(): + """Test get_load_curve_dir constructs correct load curve directory path.""" + output_dir = Path("/data/resstock") + load_curve_dir = get_load_curve_dir( + output_dir=output_dir, + release_year="2024", + weather_file="tmy3", + release_version="2", + curve_subdir="load_curve_hourly", + ) + expected = output_dir / "res_2024_tmy3_2" / "load_curve_hourly" + assert load_curve_dir == expected + + +def test_get_load_curve_path(): + """Test get_load_curve_path constructs correct load curve file path.""" + load_curve_dir = Path("/data/resstock/load_curve_hourly") + path = get_load_curve_path( + load_curve_dir=load_curve_dir, + bldg_id=12345, + state="NY", + upgrade_id="0", + ) + expected = load_curve_dir / "state=NY" / "upgrade=00" / "12345-0.parquet" + assert path == expected + + +# ============================================================================== +# Integration tests - Require buildstock-fetch and generate test outputs +# ============================================================================== + + +@pytest.mark.integration +def test_fetch_sample(): + """Test fetch_sample with sample_size=1. + + Outputs saved to: tests/test_outputs/buildstock_io/sample_1/ + """ + output_dir = TEST_OUTPUT_DIR / "sample_1" + output_dir.mkdir(parents=True, exist_ok=True) + + paths, failed = fetch_sample( + upgrade_id="0", + release_year="2024", + weather_file="tmy3", + release_version="2", + state="NY", + output_dir=output_dir, + max_workers=1, + sample_size=1, + random_seed=42, + file_type=("metadata", "load_curve_hourly"), + ) + + assert len(failed) == 0 + assert len(paths) > 0 + + metadata_path = get_metadata_path( + output_dir=output_dir, + release_year="2024", + weather_file="tmy3", + release_version="2", + upgrade_id="0", + state="NY", + ) + assert metadata_path.exists() + + import polars as pl + + metadata = pl.read_parquet(metadata_path) + assert len(metadata) == 1 + + +@pytest.mark.integration +def test_fetch_for_building_ids(): + """Test fetch_for_building_ids with 1 specific building. + + Outputs saved to: tests/test_outputs/buildstock_io/specific_building/ + """ + baseline_dir = TEST_OUTPUT_DIR / "baseline_for_upgrade_test" + baseline_dir.mkdir(parents=True, exist_ok=True) + + _, _ = fetch_sample( + upgrade_id="0", + release_year="2024", + weather_file="tmy3", + release_version="2", + state="NY", + output_dir=baseline_dir, + max_workers=1, + sample_size=1, + random_seed=42, + file_type=("metadata",), + ) + + import polars as pl + + metadata_path = get_metadata_path( + output_dir=baseline_dir, + release_year="2024", + weather_file="tmy3", + release_version="2", + upgrade_id="0", + state="NY", + ) + metadata = pl.read_parquet(metadata_path) + building_id = metadata["bldg_id"][0] + + output_dir = TEST_OUTPUT_DIR / "specific_building" + output_dir.mkdir(parents=True, exist_ok=True) + + paths, failed = fetch_for_building_ids( + building_ids=[building_id], + upgrade_id="1", + release_year="2024", + weather_file="tmy3", + release_version="2", + state="NY", + output_dir=output_dir, + max_workers=1, + file_type=("metadata", "load_curve_hourly"), + ) + + assert len(failed) == 0 + assert len(paths) > 0 + + upgrade_metadata_path = get_metadata_path( + output_dir=output_dir, + release_year="2024", + weather_file="tmy3", + release_version="2", + upgrade_id="1", + state="NY", + ) + assert upgrade_metadata_path.exists() diff --git a/tests/test_utils/test_mixed_adoption_trajectory.py b/tests/test_utils/test_mixed_adoption_trajectory.py new file mode 100644 index 0000000..29461d0 --- /dev/null +++ b/tests/test_utils/test_mixed_adoption_trajectory.py @@ -0,0 +1,224 @@ +"""Tests for utils/mixed_adoption_trajectory.py. + +Each test function corresponds to one key function in mixed_adoption_trajectory.py. +Integration tests generate outputs to tests/test_outputs/mixed_adoption_trajectory/ (git-ignored). + +Test creates a complete adoption trajectory with: +- Sample size: 10 buildings +- Adoption fractions: 10% (1 building), 20% (2 buildings) +""" + +from pathlib import Path + +import polars as pl +import pytest + +from utils.mixed_adoption_trajectory import ( + build_adoption_trajectory, + create_mixed_loads, + create_mixed_metadata, + fetch_baseline_sample, +) + +# Test output directory (git-ignored) +TEST_OUTPUT_DIR = Path(__file__).parent.parent / "test_outputs" / "mixed_adoption_trajectory" + + +# ============================================================================== +# Integration tests - Require buildstock-fetch and generate test outputs +# ============================================================================== + + +@pytest.mark.integration +def test_fetch_baseline_sample(): + """Test fetch_baseline_sample with sample_size=10. + + Outputs saved to: tests/test_outputs/mixed_adoption_trajectory/baseline_10/ + """ + output_dir = TEST_OUTPUT_DIR / "baseline_10" + output_dir.mkdir(parents=True, exist_ok=True) + + metadata_path, building_ids = fetch_baseline_sample( + sample_size=10, + random_seed=42, + release_year="2024", + weather_file="tmy3", + release_version="2", + state="NY", + output_dir=output_dir, + max_workers=5, + ) + + assert metadata_path.exists() + assert len(building_ids) == 10 + assert all(isinstance(bid, int) for bid in building_ids) + + metadata = pl.read_parquet(metadata_path) + assert len(metadata) == 10 + assert metadata["bldg_id"].to_list() == building_ids + + +@pytest.mark.integration +def test_create_mixed_metadata(): + """Test create_mixed_metadata with 2 adopters out of 10 buildings. + + Requires baseline data from test_fetch_baseline_sample. + """ + baseline_dir = TEST_OUTPUT_DIR / "baseline_10" + + from utils.buildstock_io import get_metadata_path + + baseline_metadata_path = get_metadata_path( + output_dir=baseline_dir, + release_year="2024", + weather_file="tmy3", + release_version="2", + upgrade_id="0", + state="NY", + ) + + if not baseline_metadata_path.exists(): + pytest.skip("Baseline data not found - run test_fetch_baseline_sample first") + + baseline_metadata = pl.read_parquet(baseline_metadata_path) + building_ids = baseline_metadata["bldg_id"].to_list() + adopter_ids = building_ids[:2] + + mixed_metadata = create_mixed_metadata( + baseline_metadata=baseline_metadata, + upgrade_metadata_path=baseline_metadata_path, + adopter_ids=adopter_ids, + ) + + assert len(mixed_metadata) == 10 + assert "adopted" in mixed_metadata.columns + assert mixed_metadata["adopted"].sum() == 2 + + for bid in adopter_ids: + adopted_value = mixed_metadata.filter(pl.col("bldg_id") == bid)["adopted"][0] + assert adopted_value == 1 + + +@pytest.mark.integration +def test_create_mixed_loads(): + """Test create_mixed_loads with 2 adopters out of 10 buildings. + + Requires baseline data from test_fetch_baseline_sample. + """ + baseline_dir = TEST_OUTPUT_DIR / "baseline_10" + + from utils.buildstock_io import get_load_curve_dir, get_metadata_path + + baseline_metadata_path = get_metadata_path( + output_dir=baseline_dir, + release_year="2024", + weather_file="tmy3", + release_version="2", + upgrade_id="0", + state="NY", + ) + + if not baseline_metadata_path.exists(): + pytest.skip("Baseline data not found - run test_fetch_baseline_sample first") + + baseline_metadata = pl.read_parquet(baseline_metadata_path) + building_ids = baseline_metadata["bldg_id"].to_list() + + load_curve_dir = get_load_curve_dir( + output_dir=baseline_dir, + release_year="2024", + weather_file="tmy3", + release_version="2", + curve_subdir="load_curve_hourly", + ) + + adopter_ids = building_ids[:2] + + mixed_loads = create_mixed_loads( + building_ids=building_ids, + adopter_ids=adopter_ids, + load_curve_dir=load_curve_dir, + state="NY", + upgrade_id="0", + ) + + assert "bldg_id" in mixed_loads.columns + unique_bldgs = mixed_loads["bldg_id"].unique().to_list() + assert len(unique_bldgs) == 10 + assert set(unique_bldgs) == set(building_ids) + + +@pytest.mark.integration +def test_build_adoption_trajectory(): + """Test complete build_adoption_trajectory with 10% and 20% adoption. + + Full integration test: + 1. Fetches 10 baseline buildings + 2. Creates 10% adoption scenario (1 building) + 3. Creates 20% adoption scenario (2 buildings) + 4. Verifies cumulative property + + Outputs saved to: tests/test_outputs/mixed_adoption_trajectory/scenarios/ + """ + baseline_dir = TEST_OUTPUT_DIR / "full_test_baseline" + baseline_dir.mkdir(parents=True, exist_ok=True) + + metadata_path, building_ids = fetch_baseline_sample( + sample_size=10, + random_seed=42, + release_year="2024", + weather_file="tmy3", + release_version="2", + state="NY", + output_dir=baseline_dir, + max_workers=5, + ) + + output_processed_dir = TEST_OUTPUT_DIR / "scenarios" + output_processed_dir.mkdir(parents=True, exist_ok=True) + + scenario_paths = build_adoption_trajectory( + baseline_metadata_path=metadata_path, + baseline_building_ids=building_ids, + adoption_fractions=[0.1, 0.2], + upgrade_id="1", + release_year="2024", + weather_file="tmy3", + release_version="2", + state="NY", + output_dir=baseline_dir, + max_workers=5, + output_processed_dir=output_processed_dir, + ) + + # Verify scenarios created + assert 0.1 in scenario_paths + assert 0.2 in scenario_paths + assert scenario_paths[0.1].exists() + assert scenario_paths[0.2].exists() + + # Load scenarios + scenario_10 = pl.read_parquet(scenario_paths[0.1]) + scenario_20 = pl.read_parquet(scenario_paths[0.2]) + + # Verify structure + assert "bldg_id" in scenario_10.columns + assert "adopted" in scenario_10.columns + assert "bldg_id" in scenario_20.columns + assert "adopted" in scenario_20.columns + + # Count adopters + adopters_10 = scenario_10.groupby("bldg_id").agg(pl.col("adopted").first()) + adopters_20 = scenario_20.groupby("bldg_id").agg(pl.col("adopted").first()) + + n_adopters_10 = adopters_10["adopted"].sum() + n_adopters_20 = adopters_20["adopted"].sum() + + assert n_adopters_10 == 1 + assert n_adopters_20 == 2 + + # Verify cumulative property + adopter_ids_10 = set(adopters_10.filter(pl.col("adopted") == 1)["bldg_id"].to_list()) + adopter_ids_20 = set(adopters_20.filter(pl.col("adopted") == 1)["bldg_id"].to_list()) + + assert adopter_ids_10.issubset(adopter_ids_20) diff --git a/src/rate_design/utils/__init__.py b/utils/__init__.py similarity index 100% rename from src/rate_design/utils/__init__.py rename to utils/__init__.py diff --git a/src/rate_design/utils/bsf_to_cairo.py b/utils/bsf_to_cairo.py similarity index 100% rename from src/rate_design/utils/bsf_to_cairo.py rename to utils/bsf_to_cairo.py diff --git a/utils/buildstock_io.py b/utils/buildstock_io.py new file mode 100644 index 0000000..66feda6 --- /dev/null +++ b/utils/buildstock_io.py @@ -0,0 +1,198 @@ +"""IO helpers for working with BuildStock artifacts.""" + +from pathlib import Path +from typing import Optional + +import numpy as np +from buildstock_fetch.main import BuildingID, fetch_bldg_data, fetch_bldg_ids + + +def get_buildstock_release_dir( + output_dir: Path, + release_year: str = "2024", + weather_file: str = "tmy3", + release_version: str = "2", +) -> Path: + """Return buildstock-fetch output directory for a release. + + Follows the naming convention: + {output_dir}/res_{release_year}_{weather_file}_{release_version}/ + + Example: tests/test_data/res_2024_tmy3_2/ + """ + release_name = f"res_{release_year}_{weather_file}_{release_version}" + return output_dir / release_name + + +def get_metadata_path( + output_dir: Path, + release_year: str = "2024", + weather_file: str = "tmy3", + release_version: str = "2", + upgrade_id: str = "0", + state: str = "NY", +) -> Path: + """Return path to metadata parquet for a specific upgrade. + + Structure: {release_dir}/metadata/state={state}/upgrade={upgrade_id}/metadata.parquet + Upgrade ID is zero-padded to 2 digits (e.g., "0" -> "00", "1" -> "01") + """ + release_dir = get_buildstock_release_dir(output_dir, release_year, weather_file, release_version) + upgrade_padded = f"{int(upgrade_id):02d}" + return release_dir / "metadata" / f"state={state}" / f"upgrade={upgrade_padded}" / "metadata.parquet" + + +def get_load_curve_dir( + output_dir: Path, + release_year: str = "2024", + weather_file: str = "tmy3", + release_version: str = "2", + curve_subdir: str = "load_curve_hourly", +) -> Path: + """Return directory containing load curves (hourly or 15-minute).""" + release_dir = get_buildstock_release_dir(output_dir, release_year, weather_file, release_version) + return release_dir / curve_subdir + + +def get_load_curve_path( + load_curve_dir: Path, + bldg_id: int, + state: str = "NY", + upgrade_id: str = "0", +) -> Path: + """Return path to load curve parquet file for a specific building/upgrade. + + Structure: {load_curve_dir}/state={state}/upgrade={upgrade_id}/{bldg_id}-{upgrade_id_unpadded}.parquet + Upgrade directory is zero-padded to 2 digits, but filename uses unpadded ID. + Example: state=NY/upgrade=00/352381-0.parquet + """ + upgrade_padded = f"{int(upgrade_id):02d}" + filename = f"{bldg_id}-{upgrade_id}.parquet" + return load_curve_dir / f"state={state}" / f"upgrade={upgrade_padded}" / filename + + +# ------------------------------------------------------------------------------ +# BuildStock-fetch interface +# ------------------------------------------------------------------------------ + +def fetch_sample( + *, + upgrade_id: str = "0", + release_year: str = "2024", + weather_file: str = "tmy3", + release_version: str = "2", + state: str = "NY", + output_dir: Path = Path("./rate_design/ny/hp_rates/data/buildstock_raw"), + max_workers: int = 5, + sample_size: Optional[int] = None, + random_seed: Optional[int] = None, + file_type: tuple[str, ...] = ("metadata", "load_curve_hourly"), +) -> tuple[list[Path], list[str]]: + """Fetch a sample of N buildings for a given upgrade via buildstock-fetch. + + This is a generic interface for fetching any upgrade data. Optionally samples + a random subset of buildings for faster iteration. + + Args: + upgrade_id: Upgrade ID to fetch (e.g., "0" for baseline, "1" for upgrade) + release_year: ResStock release year (e.g., "2024") + weather_file: Weather file type (e.g., "tmy3") + release_version: Release version number (e.g., "2") + state: State abbreviation (e.g., "NY") + output_dir: Directory to save downloaded files + max_workers: Number of parallel download workers + sample_size: Optional number of buildings to sample (None = all buildings) + random_seed: Random seed for sampling reproducibility + file_type: Tuple of file types to fetch (e.g., ("metadata", "load_curve_hourly")) + + Returns: + Tuple of (downloaded_paths, failed_building_ids) + """ + output_dir = Path(output_dir) + output_dir.mkdir(parents=True, exist_ok=True) + + # Fetch all available building IDs for this upgrade + bldg_ids = fetch_bldg_ids( + product="resstock", + release_year=release_year, + weather_file=weather_file, + release_version=release_version, + state=state, + upgrade_id=upgrade_id, + ) + + # Sample if requested + if sample_size is not None and sample_size < len(bldg_ids): + rng = np.random.default_rng(random_seed) + all_bldg_ids = [bid.bldg_id for bid in bldg_ids] + sampled_ids = set(rng.choice(all_bldg_ids, size=sample_size, replace=False)) + bldg_ids = [bid for bid in bldg_ids if bid.bldg_id in sampled_ids] + + # Fetch data + paths, failed = fetch_bldg_data( + bldg_ids=bldg_ids, + file_type=file_type, + output_dir=output_dir, + max_workers=max_workers, + ) + + return paths, failed + + +def fetch_for_building_ids( + *, + building_ids: list[int], + upgrade_id: str, + release_year: str = "2024", + weather_file: str = "tmy3", + release_version: str = "2", + state: str = "NY", + output_dir: Path = Path("./rate_design/ny/hp_rates/data/buildstock_raw"), + max_workers: int = 5, + file_type: tuple[str, ...] = ("metadata", "load_curve_hourly"), +) -> tuple[list[Path], list[str]]: + """Fetch data for specific building IDs from a given upgrade. + + This is useful when you already know which buildings you want to fetch + (e.g., a subset selected for upgrade adoption). + + Args: + building_ids: List of building IDs to fetch + upgrade_id: Upgrade ID to fetch (e.g., "0" for baseline, "1" for upgrade) + release_year: ResStock release year (e.g., "2024") + weather_file: Weather file type (e.g., "tmy3") + release_version: Release version number (e.g., "2") + state: State abbreviation (e.g., "NY") + output_dir: Directory to save downloaded files + max_workers: Number of parallel download workers + file_type: Tuple of file types to fetch (e.g., ("metadata", "load_curve_hourly")) + + Returns: + Tuple of (downloaded_paths, failed_building_ids) + """ + output_dir = Path(output_dir) + output_dir.mkdir(parents=True, exist_ok=True) + + # Convert integer building IDs to BuildingID objects + bldg_id_objects = [ + BuildingID( + bldg_id=bid, + release_number=release_version, + release_year=release_year, + res_com="resstock", + weather=weather_file, + upgrade_id=upgrade_id, + state=state, + ) + for bid in building_ids + ] + + # Fetch data + paths, failed = fetch_bldg_data( + bldg_ids=bldg_id_objects, + file_type=file_type, + output_dir=output_dir, + max_workers=max_workers, + ) + + return paths, failed diff --git a/utils/mixed_adoption_trajectory.py b/utils/mixed_adoption_trajectory.py new file mode 100644 index 0000000..098dca7 --- /dev/null +++ b/utils/mixed_adoption_trajectory.py @@ -0,0 +1,353 @@ +"""Pipeline utilities for mixed baseline/upgrade adoption trajectories. + +This module orchestrates mixed adoption scenarios where a fraction of buildings +adopt an upgrade (e.g., heat pumps, weatherization) while others remain at baseline. + +Workflow: +1. Fetch baseline sample (N buildings) → establishes deterministic ordering +2. For each adoption fraction (10%, 20%, etc.): + - Select first x% of building IDs from ordering + - Fetch upgrade data ONLY for those specific buildings + - Create mixed metadata (baseline + upgrade with adoption flag) + - Create mixed load curves (baseline + upgrade concatenated) +""" + +from pathlib import Path + +import polars as pl + +from utils.buildstock_io import ( + fetch_for_building_ids, + fetch_sample, + get_load_curve_path, + get_metadata_path, +) + + +# ------------------------------------------------------------------------------ +# Step 1: Fetch baseline sample +# ------------------------------------------------------------------------------ + +def fetch_baseline_sample( + *, + sample_size: int, + random_seed: int, + release_year: str = "2024", + weather_file: str = "tmy3", + release_version: str = "2", + state: str = "NY", + output_dir: Path, + max_workers: int = 5, +) -> tuple[Path, list[int]]: + """Fetch baseline sample and return metadata path + deterministic building ID ordering. + + This establishes the fixed ordering of buildings for all adoption fractions. + The ordering is determined by the random seed and remains constant across + all adoption scenarios. + + Args: + sample_size: Number of buildings to sample + random_seed: Seed for reproducible sampling + release_year: ResStock release year (e.g., "2024") + weather_file: Weather file type (e.g., "tmy3") + release_version: Release version (e.g., "2") + state: State abbreviation (e.g., "NY") + output_dir: Directory to save downloaded files + max_workers: Number of parallel download workers + + Returns: + Tuple of (metadata_path, building_id_ordering) + """ + # Fetch baseline data (upgrade 0) + _, failed = fetch_sample( + upgrade_id="0", + release_year=release_year, + weather_file=weather_file, + release_version=release_version, + state=state, + output_dir=output_dir, + max_workers=max_workers, + sample_size=sample_size, + random_seed=random_seed, + file_type=("metadata", "load_curve_hourly"), + ) + + if failed: + print(f"Warning: {len(failed)} baseline files failed to download") + + # Get metadata path and extract building ID ordering + metadata_path = get_metadata_path( + output_dir=output_dir, + release_year=release_year, + weather_file=weather_file, + release_version=release_version, + upgrade_id="0", + state=state, + ) + + # Read metadata to get building IDs - this is our deterministic ordering + metadata = pl.read_parquet(metadata_path) + building_ids = metadata["bldg_id"].to_list() + + return metadata_path, building_ids + + +# ------------------------------------------------------------------------------ +# Step 2: Build adoption trajectory +# ------------------------------------------------------------------------------ + +def build_adoption_trajectory( + *, + baseline_metadata_path: Path, + baseline_building_ids: list[int], + adoption_fractions: list[float], + upgrade_id: str = "1", + release_year: str = "2024", + weather_file: str = "tmy3", + release_version: str = "2", + state: str = "NY", + output_dir: Path, + max_workers: int = 5, + output_processed_dir: Path, +) -> dict[float, Path]: + """Build mixed adoption scenarios for multiple adoption fractions. + + For each adoption fraction: + 1. Select first x% of building IDs from ordering + 2. Fetch upgrade data only for those buildings + 3. Create mixed metadata and load curves + 4. Save to parquet + + Args: + baseline_metadata_path: Path to baseline metadata parquet + baseline_building_ids: Ordered list of building IDs (from fetch_baseline_sample) + adoption_fractions: List of adoption fractions (e.g., [0.1, 0.2, 0.5]) + upgrade_id: Upgrade ID to fetch (e.g., "1" for heat pump upgrade) + release_year: ResStock release year + weather_file: Weather file type + release_version: Release version + state: State abbreviation + output_dir: Directory where buildstock data is stored + max_workers: Number of parallel download workers + output_processed_dir: Directory to save processed mixed scenarios + + Returns: + Dictionary mapping adoption fraction to output parquet path + """ + output_processed_dir = Path(output_processed_dir) + output_processed_dir.mkdir(parents=True, exist_ok=True) + + # Load baseline metadata (will be reused for all fractions) + baseline_metadata = pl.read_parquet(baseline_metadata_path) + + # Track which buildings have been fetched for upgrade + fetched_upgrade_ids = set() + scenario_paths = {} + + for fraction in sorted(adoption_fractions): + print(f"\n--- Building {fraction*100:.0f}% adoption scenario ---") + + # Select first x% of buildings from ordering + n_adopters = int(round(fraction * len(baseline_building_ids))) + adopter_ids = baseline_building_ids[:n_adopters] + + # Fetch upgrade data only for new adopters (not already fetched) + new_adopters = set(adopter_ids) - fetched_upgrade_ids + if new_adopters: + print(f"Fetching upgrade data for {len(new_adopters)} new adopters...") + fetch_for_building_ids( + building_ids=list(new_adopters), + upgrade_id=upgrade_id, + release_year=release_year, + weather_file=weather_file, + release_version=release_version, + state=state, + output_dir=output_dir, + max_workers=max_workers, + file_type=("metadata", "load_curve_hourly"), + ) + fetched_upgrade_ids.update(new_adopters) + + # Create mixed metadata and loads + output_path = output_processed_dir / f"mixed_{fraction:.2f}.parquet" + + mixed_data = create_mixed_scenario( + baseline_metadata=baseline_metadata, + baseline_building_ids=baseline_building_ids, + adopter_ids=adopter_ids, + upgrade_id=upgrade_id, + state=state, + output_dir=output_dir, + release_year=release_year, + weather_file=weather_file, + release_version=release_version, + ) + + # Save mixed scenario + mixed_data.write_parquet(output_path) + print(f"Saved {fraction*100:.0f}% adoption scenario to {output_path}") + + scenario_paths[fraction] = output_path + + return scenario_paths + + +# ------------------------------------------------------------------------------ +# Step 3: Create mixed scenario (metadata + loads) +# ------------------------------------------------------------------------------ + +def create_mixed_scenario( + *, + baseline_metadata: pl.DataFrame, + baseline_building_ids: list[int], + adopter_ids: list[int], + upgrade_id: str, + state: str, + output_dir: Path, + release_year: str, + weather_file: str, + release_version: str, +) -> pl.DataFrame: + """Create a mixed scenario with metadata and load curves. + + Combines baseline and upgrade data, adding an adoption flag column. + + Args: + baseline_metadata: Baseline metadata DataFrame + baseline_building_ids: All building IDs in baseline + adopter_ids: Building IDs that adopt the upgrade + upgrade_id: Upgrade ID + state: State abbreviation + output_dir: BuildStock data directory + release_year: ResStock release year + weather_file: Weather file type + release_version: Release version + + Returns: + DataFrame with concatenated load curves and metadata with adoption flag + """ + # Get upgrade metadata path + upgrade_metadata_path = get_metadata_path( + output_dir=output_dir, + release_year=release_year, + weather_file=weather_file, + release_version=release_version, + upgrade_id=upgrade_id, + state=state, + ) + + # Create mixed metadata + mixed_metadata = create_mixed_metadata( + baseline_metadata=baseline_metadata, + upgrade_metadata_path=upgrade_metadata_path, + adopter_ids=adopter_ids, + ) + + # Create mixed load curves + from utils.buildstock_io import get_load_curve_dir + + load_curve_dir = get_load_curve_dir( + output_dir=output_dir, + release_year=release_year, + weather_file=weather_file, + release_version=release_version, + curve_subdir="load_curve_hourly", + ) + + mixed_loads = create_mixed_loads( + building_ids=baseline_building_ids, + adopter_ids=adopter_ids, + load_curve_dir=load_curve_dir, + state=state, + upgrade_id=upgrade_id, + ) + + # Join loads with metadata + result = mixed_loads.join(mixed_metadata, on="bldg_id", how="left") + + return result + + +def create_mixed_metadata( + *, + baseline_metadata: pl.DataFrame, + upgrade_metadata_path: Path, + adopter_ids: list[int], +) -> pl.DataFrame: + """Create mixed metadata with adoption flag. + + Takes baseline metadata and replaces rows for adopters with upgrade metadata, + adding an 'adopted' flag column (0 for baseline, 1 for upgrade). + + Args: + baseline_metadata: Baseline metadata DataFrame + upgrade_metadata_path: Path to upgrade metadata parquet + adopter_ids: List of building IDs that adopted the upgrade + + Returns: + Mixed metadata DataFrame with adoption flag + """ + # Read upgrade metadata (only for adopters) + if adopter_ids: + upgrade_metadata = pl.read_parquet(upgrade_metadata_path) + upgrade_metadata = upgrade_metadata.filter(pl.col("bldg_id").is_in(adopter_ids)) + else: + upgrade_metadata = pl.DataFrame() + + # Start with baseline, add adoption flag (0 = not adopted) + mixed = baseline_metadata.with_columns(pl.lit(0).alias("adopted")) + + # Replace adopter rows with upgrade metadata if any + if not upgrade_metadata.is_empty(): + # Filter out adopters from baseline + mixed = mixed.filter(~pl.col("bldg_id").is_in(adopter_ids)) + + # Add upgrade metadata with adoption flag (1 = adopted) + upgrade_with_flag = upgrade_metadata.with_columns(pl.lit(1).alias("adopted")) + + # Concatenate + mixed = pl.concat([mixed, upgrade_with_flag]) + + return mixed + + +def create_mixed_loads( + *, + building_ids: list[int], + adopter_ids: list[int], + load_curve_dir: Path, + state: str, + upgrade_id: str, +) -> pl.DataFrame: + """Create mixed load curves (baseline + upgrade). + + Args: + building_ids: All building IDs + adopter_ids: Building IDs that adopted the upgrade + load_curve_dir: Directory containing load curve parquets + state: State abbreviation + upgrade_id: Upgrade ID for adopters + + Returns: + Concatenated load curves with bldg_id column + """ + adopter_set = set(adopter_ids) + load_dfs = [] + + for bid in building_ids: + # Determine which upgrade to use + use_upgrade_id = upgrade_id if bid in adopter_set else "0" + + # Get load curve path + load_path = get_load_curve_path( + load_curve_dir=load_curve_dir, + bldg_id=bid, + state=state, + upgrade_id=use_upgrade_id, + ) + + # Read and add building ID + load_df = pl.read_parquet(load_path).with_columns(pl.lit(bid).alias("bldg_id")) + load_dfs.append(load_df) + + return pl.concat(load_dfs) diff --git a/src/rate_design/utils/s3_sync.py b/utils/s3_sync.py similarity index 100% rename from src/rate_design/utils/s3_sync.py rename to utils/s3_sync.py diff --git a/uv.lock b/uv.lock index ff02838..e5d8e54 100644 --- a/uv.lock +++ b/uv.lock @@ -222,7 +222,7 @@ name = "importlib-metadata" version = "8.7.0" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "zipp" }, + { name = "zipp", marker = "python_full_version < '3.12'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/76/66/650a33bd90f786193e4de4b3ad86ea60b53c89b669a5c7be931fac31cdb0/importlib_metadata-8.7.0.tar.gz", hash = "sha256:d13b81ad223b890aa16c5471f2ac3056cf76c5f10f82d6f9292f0b415f389000", size = 56641, upload-time = "2025-04-27T15:29:01.736Z" } wheels = [ @@ -682,6 +682,9 @@ source = { editable = "." } dependencies = [ { name = "buildstock-fetch" }, { name = "cairo" }, + { name = "numpy" }, + { name = "polars" }, + { name = "pyarrow" }, ] [package.optional-dependencies] @@ -693,6 +696,9 @@ dev = [ requires-dist = [ { name = "buildstock-fetch" }, { name = "cairo", git = "https://github.com/NREL/CAIRO.git?rev=tb%2Fdev_package" }, + { name = "numpy" }, + { name = "polars" }, + { name = "pyarrow" }, { name = "pytest", marker = "extra == 'dev'" }, ] provides-extras = ["dev"]