From 66d67177dae75c100feca1336339d33616ddb466 Mon Sep 17 00:00:00 2001 From: sherryzuo <145484385+sherryzuo@users.noreply.github.com> Date: Sun, 22 Mar 2026 19:48:15 +0000 Subject: [PATCH 01/19] Add skeleton files for mixed-upgrade HP adoption trajectory pipeline MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Skeleton only — no functional implementation yet. Adds: - utils/pre/materialize_mixed_upgrade.py: CLI stub that will assign buildings to upgrades per year and write per-year metadata + load symlinks. - utils/pre/generate_adoption_scenario_yamls.py: CLI stub that will generate year-indexed scenario YAML entries from a base scenario. - rate_design/hp_rates/ny/config/adoption/nyca_electrification.yaml: NYISO Gold Book 2025 adoption trajectory config (placeholder fractions). - tests/pre/test_materialize_mixed_upgrade.py: skipped test stubs covering assignment logic, metadata combination, symlink creation, and validation. - Justfile adoption trajectory recipes: materialize-adoption, generate-adoption-scenarios, run-adoption-scenario, run-adoption-all. --- rate_design/hp_rates/Justfile | 63 ++++++++++++++ .../config/adoption/nyca_electrification.yaml | 25 ++++++ tests/pre/__init__.py | 0 tests/pre/test_materialize_mixed_upgrade.py | 78 +++++++++++++++++ utils/pre/generate_adoption_scenario_yamls.py | 85 +++++++++++++++++++ utils/pre/materialize_mixed_upgrade.py | 80 +++++++++++++++++ 6 files changed, 331 insertions(+) create mode 100644 rate_design/hp_rates/ny/config/adoption/nyca_electrification.yaml create mode 100644 tests/pre/__init__.py create mode 100644 tests/pre/test_materialize_mixed_upgrade.py create mode 100644 utils/pre/generate_adoption_scenario_yamls.py create mode 100644 utils/pre/materialize_mixed_upgrade.py diff --git a/rate_design/hp_rates/Justfile b/rate_design/hp_rates/Justfile index f1d230d3..0bdd6bbb 100644 --- a/rate_design/hp_rates/Justfile +++ b/rate_design/hp_rates/Justfile @@ -677,6 +677,69 @@ run-subset runs: just "run-${num}" done +# ============================================================================= +# ADOPTION TRAJECTORY (mixed-upgrade) +# ============================================================================= + +path_adoption_config_dir := path_config / "adoption" + +# Materialize per-year ResStock data for a mixed-upgrade adoption trajectory. +# Reads the adoption config YAML, assigns buildings to upgrades per year, and +# writes year=/ directories under the adoption output path. +# +# Example: +# just s ny materialize-adoption nyca_electrification +materialize-adoption config_name="default": + uv run python {{ path_repo }}/utils/pre/materialize_mixed_upgrade.py \ + --state "{{ state }}" \ + --utility "{{ utility }}" \ + --adoption-config "{{ path_adoption_config_dir }}/{{ config_name }}.yaml" \ + --path-resstock-release "{{ path_resstock_release }}" \ + --output-dir "{{ path_resstock_release }}/adoption/{{ config_name }}" + +# Generate per-year scenario YAML entries for adoption runs. +# Output: config/scenarios/scenarios__adoption.yaml +# +# Example: +# just s ny generate-adoption-scenarios nyca_electrification 1,2,5,6 +generate-adoption-scenarios config_name="default" runs="1,2": + uv run python {{ path_repo }}/utils/pre/generate_adoption_scenario_yamls.py \ + --base-scenario "{{ path_scenario_config }}" \ + --runs "{{ runs }}" \ + --adoption-config "{{ path_adoption_config_dir }}/{{ config_name }}.yaml" \ + --materialized-dir "{{ path_resstock_release }}/adoption/{{ config_name }}" \ + --output "{{ path_scenarios }}/scenarios_{{ utility }}_adoption.yaml" + +# Run a single adoption scenario by (year-indexed) run number. +# +# Example: +# just s ny run-adoption-scenario 101 +run-adoption-scenario run_num: + #!/usr/bin/env bash + set -euo pipefail + : "${RDP_BATCH:?Set RDP_BATCH before running}" + export RDP_BATCH + uv run python {{ path_repo }}/rate_design/hp_rates/run_scenario.py \ + --scenario-config "{{ path_scenarios }}/scenarios_{{ utility }}_adoption.yaml" \ + --run-num "{{ run_num }}" + +# Orchestrate the full adoption pipeline: materialize → generate scenarios → run all. +# +# Example: +# RDP_BATCH=ny_20260320_adoption just s ny run-adoption-all nyca_electrification 1,2 +run-adoption-all config_name="default" runs="1,2": + just materialize-adoption "{{ config_name }}" + just generate-adoption-scenarios "{{ config_name }}" "{{ runs }}" + #!/usr/bin/env bash + set -euo pipefail + : "${RDP_BATCH:?Set RDP_BATCH before running}" + export RDP_BATCH + IFS=',' read -ra nums <<< "{{ runs }}" + for num in "${nums[@]}"; do + echo ">> run-adoption-all: run-${num}" >&2 + just run-adoption-scenario "${num}" + done + # ============================================================================= # HELPERS # ============================================================================= diff --git a/rate_design/hp_rates/ny/config/adoption/nyca_electrification.yaml b/rate_design/hp_rates/ny/config/adoption/nyca_electrification.yaml new file mode 100644 index 00000000..a636ab45 --- /dev/null +++ b/rate_design/hp_rates/ny/config/adoption/nyca_electrification.yaml @@ -0,0 +1,25 @@ +# NYCA building electrification adoption trajectory (NYISO Gold Book 2025). +# +# Fractions represent the share of total buildings assigned to each ResStock +# upgrade per year index. Remaining buildings stay at upgrade 0 (baseline). +# Year indices map to calendar years via year_labels. +# +# Technology → ResStock upgrade mapping: +# ASHP Full Capacity → 2 (cold-climate ASHP, 90% capacity @ 5F, elec backup) +# ASHP Dual Fuel → 4 (ENERGY STAR ASHP + existing fossil backup) +# Ground Source HP → 5 (geothermal heat pump) +# Supplemental Heat → 1 (ENERGY STAR ASHP, 50% capacity @ 5F, elec backup) +# Electric Resistance → baseline upgrade 0, already captured there + +scenario_name: nyca_electrification +random_seed: 42 + +# TODO: Replace placeholder fractions with values derived from NYISO Gold Book +# 2025 NYCA electrification forecast once data extraction is complete. +scenario: + 2: [0.005, 0.015, 0.035, 0.060, 0.095, 0.130, 0.160] # ASHP full capacity + 4: [0.002, 0.008, 0.018, 0.030, 0.045, 0.060, 0.075] # ASHP dual fuel + 5: [0.001, 0.003, 0.005, 0.008, 0.012, 0.018, 0.025] # ground source HP + 1: [0.002, 0.006, 0.015, 0.030, 0.055, 0.090, 0.130] # supplemental heat + +year_labels: [2025, 2028, 2031, 2034, 2037, 2040, 2043] diff --git a/tests/pre/__init__.py b/tests/pre/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/pre/test_materialize_mixed_upgrade.py b/tests/pre/test_materialize_mixed_upgrade.py new file mode 100644 index 00000000..64bc9b7e --- /dev/null +++ b/tests/pre/test_materialize_mixed_upgrade.py @@ -0,0 +1,78 @@ +"""Tests for utils/pre/materialize_mixed_upgrade.py. + +TODO: implement tests — this is a skeleton stub. + +Planned coverage: +- Building assignment logic: correct fraction of buildings assigned to each + upgrade per year (monotonicity, no double-assignment). +- Metadata combination: correct columns present, correct rows per upgrade. +- Symlink creation: correct targets, correct filenames + (``{bldg_id}-{upgrade_id}.parquet``). +- Scenario CSV output: structure and values. +- Validation error paths: + - upgrade data missing on disk. + - fractions outside [0, 1]. + - total fraction > 1.0. +""" + +import pytest + + +@pytest.mark.skip(reason="Not yet implemented") +def test_building_assignment_fractions() -> None: + """Correct fraction of buildings is assigned to each upgrade per year.""" + raise NotImplementedError + + +@pytest.mark.skip(reason="Not yet implemented") +def test_building_assignment_monotonic() -> None: + """Buildings that adopted in year N retain their upgrade in year N+1.""" + raise NotImplementedError + + +@pytest.mark.skip(reason="Not yet implemented") +def test_metadata_combination_columns() -> None: + """Combined metadata parquet contains all required CAIRO columns.""" + raise NotImplementedError + + +@pytest.mark.skip(reason="Not yet implemented") +def test_metadata_combination_row_count() -> None: + """Each building appears exactly once in the combined metadata.""" + raise NotImplementedError + + +@pytest.mark.skip(reason="Not yet implemented") +def test_symlink_targets_correct() -> None: + """Symlinks in loads/ point to the correct upgrade's parquet file.""" + raise NotImplementedError + + +@pytest.mark.skip(reason="Not yet implemented") +def test_symlink_filenames_match_cairo_convention() -> None: + """Symlink names follow the {bldg_id}-{upgrade_id}.parquet pattern.""" + raise NotImplementedError + + +@pytest.mark.skip(reason="Not yet implemented") +def test_scenario_csv_written() -> None: + """Scenario CSV is written with bldg_id and one column per year.""" + raise NotImplementedError + + +@pytest.mark.skip(reason="Not yet implemented") +def test_missing_upgrade_directory_raises() -> None: + """Error is raised when a required upgrade directory does not exist.""" + raise NotImplementedError + + +@pytest.mark.skip(reason="Not yet implemented") +def test_invalid_fractions_raise() -> None: + """Fractions outside [0, 1] are rejected by validate_scenario().""" + raise NotImplementedError + + +@pytest.mark.skip(reason="Not yet implemented") +def test_total_fraction_exceeds_one_raises() -> None: + """Total fraction > 1.0 across upgrades is rejected by validate_scenario().""" + raise NotImplementedError diff --git a/utils/pre/generate_adoption_scenario_yamls.py b/utils/pre/generate_adoption_scenario_yamls.py new file mode 100644 index 00000000..3f122b3d --- /dev/null +++ b/utils/pre/generate_adoption_scenario_yamls.py @@ -0,0 +1,85 @@ +"""Generate per-year scenario YAML entries for mixed-upgrade adoption runs. + +Reads a base scenario YAML, extracts selected run configs, and emits a new +YAML file (``scenarios__adoption.yaml``) with one entry per +(year × run) combination. The per-year ``path_resstock_metadata`` and +``path_resstock_loads`` are rewritten to point at the materialized data +produced by ``materialize_mixed_upgrade.py``. ``run_name`` is also extended +with the year index and calendar year label. + +Usage +----- +:: + + uv run python utils/pre/generate_adoption_scenario_yamls.py \\ + --base-scenario rate_design/hp_rates/ri/config/scenarios/scenarios_rie.yaml \\ + --runs 1,2,5,6 \\ + --adoption-config rate_design/hp_rates/ny/config/adoption/nyca_electrification.yaml \\ + --materialized-dir /ebs/data/nrel/resstock/res_2024_amy2018_2_sb/adoption/nyca_electrification \\ + --output rate_design/hp_rates/ri/config/scenarios/scenarios_rie_adoption.yaml + +TODO: implement body — this is a skeleton stub. +""" + +from __future__ import annotations + +import argparse +import sys + + +def build_parser() -> argparse.ArgumentParser: + p = argparse.ArgumentParser( + description="Generate per-year scenario YAMLs for mixed-upgrade adoption runs.", + ) + p.add_argument( + "--base-scenario", + required=True, + metavar="PATH", + dest="path_base_scenario", + help="Existing scenario YAML to use as the run config template.", + ) + p.add_argument( + "--runs", + required=True, + help="Comma-separated run numbers to include (e.g. 1,2,5,6).", + ) + p.add_argument( + "--adoption-config", + required=True, + metavar="PATH", + dest="path_adoption_config", + help="Path to adoption trajectory YAML (for year_labels and scenario_name).", + ) + p.add_argument( + "--materialized-dir", + required=True, + metavar="PATH", + dest="path_materialized_dir", + help="Root of materialized per-year data (output of materialize_mixed_upgrade).", + ) + p.add_argument( + "--output", + required=True, + metavar="PATH", + dest="path_output", + help="Path to write the generated adoption scenario YAML.", + ) + return p + + +def main(argv: list[str] | None = None) -> None: + build_parser().parse_args(argv) + # TODO: implement + # 1. Load adoption config YAML for year_labels and scenario_name. + # 2. Load base scenario YAML and extract the specified run configs. + # 3. For each year index and each run number: + # a. Copy the run config. + # b. Replace path_resstock_metadata → /year=/metadata-sb.parquet + # c. Replace path_resstock_loads → /year=/loads/ + # d. Update run_name to include year index and label. + # 4. Write combined YAML to args.path_output. + raise NotImplementedError("generate_adoption_scenario_yamls is not yet implemented") + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/utils/pre/materialize_mixed_upgrade.py b/utils/pre/materialize_mixed_upgrade.py new file mode 100644 index 00000000..d82cdc45 --- /dev/null +++ b/utils/pre/materialize_mixed_upgrade.py @@ -0,0 +1,80 @@ +"""Materialize per-year ResStock data for mixed-upgrade HP adoption trajectories. + +Reads an adoption config YAML (scenario fractions per upgrade per year), assigns +buildings to upgrades using ``buildstock_fetch.scenarios.MixedUpgradeScenario``, +and writes one directory per year containing: + +- ``metadata-sb.parquet``: combined metadata rows from the assigned upgrades. +- ``loads/``: directory of symlinks pointing each building to the correct + upgrade's load parquet (``{bldg_id}-{upgrade_id}.parquet``). + +The output mirrors the layout that ``run_scenario.py`` already expects for a +single-upgrade run, so no changes are needed to the scenario runner. + +Usage +----- +:: + + uv run python utils/pre/materialize_mixed_upgrade.py \\ + --state ri \\ + --utility rie \\ + --adoption-config rate_design/hp_rates/ny/config/adoption/nyca_electrification.yaml \\ + --path-resstock-release /ebs/data/nrel/resstock/res_2024_amy2018_2_sb \\ + --output-dir /ebs/data/nrel/resstock/res_2024_amy2018_2_sb/adoption/nyca_electrification + +TODO: implement body — this is a skeleton stub. +""" + +from __future__ import annotations + +import argparse +import sys + + +def build_parser() -> argparse.ArgumentParser: + p = argparse.ArgumentParser( + description="Materialize per-year mixed-upgrade ResStock data for adoption trajectories.", + ) + p.add_argument( + "--state", required=True, help="Two-letter state abbreviation (e.g. ny, ri)." + ) + p.add_argument("--utility", required=True, help="Utility slug (e.g. rie, nyseg).") + p.add_argument( + "--adoption-config", + required=True, + metavar="PATH", + dest="path_adoption_config", + help="Path to adoption trajectory YAML.", + ) + p.add_argument( + "--path-resstock-release", + required=True, + help="Root path of the processed ResStock _sb release (local or s3://).", + ) + p.add_argument( + "--output-dir", + required=True, + metavar="PATH", + dest="path_output_dir", + help="Directory to write per-year materialized data.", + ) + return p + + +def main(argv: list[str] | None = None) -> None: + build_parser().parse_args(argv) + # TODO: implement + # 1. Load adoption config YAML and validate with buildstock_fetch.scenarios.validate_scenario() + # 2. Discover upgrade directories under args.path_resstock_release; error if any are missing. + # 3. Use MixedUpgradeScenario to assign buildings → upgrades per year. + # 4. For each year: + # a. Read metadata-sb.parquet from each required upgrade directory. + # b. Filter to correct buildings per upgrade. + # c. Combine and write to /year=/metadata-sb.parquet. + # d. Create /year=/loads/ with symlinks per building. + # 5. Write scenario CSV (bldg_id, year_0, year_1, ...) for reference. + raise NotImplementedError("materialize_mixed_upgrade is not yet implemented") + + +if __name__ == "__main__": + sys.exit(main()) From ddf458d582fcc720369b36b5d0cbd44f5059c07e Mon Sep 17 00:00:00 2001 From: sherryzuo <145484385+sherryzuo@users.noreply.github.com> Date: Sun, 22 Mar 2026 20:19:58 +0000 Subject: [PATCH 02/19] Add git commit conventions and gh pr create pattern to AGENTS.md --- AGENTS.md | 6 ++++++ tests/pre/__init__.py | 1 + 2 files changed, 7 insertions(+) diff --git a/AGENTS.md b/AGENTS.md index 297ec687..2e263805 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -130,6 +130,12 @@ Match existing style: Ruff for formatting/lint, **ty** for type checking, dprint **LaTeX in markdown:** GitHub's MathJax renderer does not support escaped underscores inside `\text{}` (e.g. `\text{avg\_mc\_peak}` will fail). Use proper math symbols instead: `\overline{MC}_{\text{peak}}`, `MC_h`, `L_h`, etc. Bare subscripts and `\text{}` with simple words (no underscores) are fine. +## Git commits + +- **Never write commit messages via a temp file** (e.g. `/tmp/commit_msg.txt`). Pass the message directly with `-m "..."` or let the user commit manually. +- **Never add co-author trailers** (`Co-authored-by: ...`) or any other generated-by attribution to commit messages or PR bodies. +- **For `gh pr create` body**: use `--body-file -` with a shell heredoc (stdin) to avoid attribution injection — do NOT use `--body "..."` with multi-line strings or `--body-file /tmp/...`. Example: `gh pr create --body-file - <<'PRBODY'\n...\nPRBODY` + ## Code Quality (required before every commit) - Run `just check` — no linter errors, no type errors, no warnings diff --git a/tests/pre/__init__.py b/tests/pre/__init__.py index e69de29b..8b137891 100644 --- a/tests/pre/__init__.py +++ b/tests/pre/__init__.py @@ -0,0 +1 @@ + From ea1e49a5b1a8f4c0698c936d9d26fededcf5fed7 Mon Sep 17 00:00:00 2001 From: sherryzuo <145484385+sherryzuo@users.noreply.github.com> Date: Sun, 22 Mar 2026 20:20:12 +0000 Subject: [PATCH 03/19] Implement materialize_mixed_upgrade: per-year ResStock data for HP adoption Assigns baseline buildings to upgrades using a monotonic random-seed allocation (contiguous shuffled-rank bands, one per upgrade), then writes per-year materialized data that run_scenario.py can consume unchanged: - metadata-sb.parquet: rows from each upgrade's metadata filtered to the buildings assigned to it for that year, combined into one file. - loads/: symlinks pointing each building to the correct upgrade's load parquet ({bldg_id}-{N}.parquet), discovered from the source directory rather than hard-coding a filename format. Buildings with postprocess_group.has_hp == True in the upgrade-0 baseline are excluded from random assignment and pinned to upgrade 0 in every year (they already have a heat pump and should not be re-assigned to a new HP upgrade). Also writes scenario_assignments.csv for reference (bldg_id + one column per run year showing the assigned upgrade). Validates the adoption config with buildstock_fetch.scenarios.validate_scenario before any I/O; errors clearly if required upgrade directories are missing. --- utils/pre/materialize_mixed_upgrade.py | 402 +++++++++++++++++++++++-- 1 file changed, 385 insertions(+), 17 deletions(-) diff --git a/utils/pre/materialize_mixed_upgrade.py b/utils/pre/materialize_mixed_upgrade.py index d82cdc45..479191c1 100644 --- a/utils/pre/materialize_mixed_upgrade.py +++ b/utils/pre/materialize_mixed_upgrade.py @@ -1,16 +1,29 @@ """Materialize per-year ResStock data for mixed-upgrade HP adoption trajectories. Reads an adoption config YAML (scenario fractions per upgrade per year), assigns -buildings to upgrades using ``buildstock_fetch.scenarios.MixedUpgradeScenario``, -and writes one directory per year containing: +buildings to upgrades using a monotonic random-seed allocation, and writes one +directory per run year containing: - ``metadata-sb.parquet``: combined metadata rows from the assigned upgrades. - ``loads/``: directory of symlinks pointing each building to the correct - upgrade's load parquet (``{bldg_id}-{upgrade_id}.parquet``). + upgrade's load parquet (``{bldg_id}-{N}.parquet``). The output mirrors the layout that ``run_scenario.py`` already expects for a single-upgrade run, so no changes are needed to the scenario runner. +Building assignment algorithm +------------------------------ +Buildings are shuffled once using the adoption config's ``random_seed``. Each +upgrade is pre-allocated a contiguous band of slots in the shuffled order (based +on its maximum fraction across all years, which is the last year's fraction since +fractions are non-decreasing). At year *t*, the first ``int(N × f[u][t])`` +buildings in upgrade *u*'s band are assigned to that upgrade; the rest remain at +upgrade 0 (baseline). This guarantees: + +- No building is assigned to more than one upgrade at a time. +- Once a building adopts an upgrade, it never reverts (monotonicity). +- The total assigned fraction never exceeds 1.0 (enforced by ``validate_scenario``). + Usage ----- :: @@ -21,14 +34,23 @@ --adoption-config rate_design/hp_rates/ny/config/adoption/nyca_electrification.yaml \\ --path-resstock-release /ebs/data/nrel/resstock/res_2024_amy2018_2_sb \\ --output-dir /ebs/data/nrel/resstock/res_2024_amy2018_2_sb/adoption/nyca_electrification - -TODO: implement body — this is a skeleton stub. """ from __future__ import annotations import argparse +import csv +import os import sys +import warnings +from pathlib import Path +from typing import Any + +import numpy as np +import polars as pl +import yaml + +from buildstock_fetch.scenarios import validate_scenario def build_parser() -> argparse.ArgumentParser: @@ -61,19 +83,365 @@ def build_parser() -> argparse.ArgumentParser: return p +# --------------------------------------------------------------------------- +# Adoption config helpers +# --------------------------------------------------------------------------- + + +def _load_adoption_config(path: Path) -> dict[str, Any]: + with open(path, encoding="utf-8") as f: + return yaml.safe_load(f) + + +def _parse_adoption_config( + config: dict[str, Any], +) -> tuple[str, int, dict[int, list[float]], list[int], list[int]]: + """Parse and return core fields from the adoption config. + + Returns: + (scenario_name, random_seed, scenario, year_labels, run_year_indices) + where ``run_year_indices`` are the indices into ``year_labels`` that + correspond to the years that should be materialized. + """ + scenario_name: str = config["scenario_name"] + random_seed: int = int(config.get("random_seed", 42)) + + # Keys may come from YAML as integers or strings; normalise to int. + scenario_raw: dict[Any, list[float]] = config["scenario"] + scenario: dict[int, list[float]] = { + int(k): [float(v) for v in vals] for k, vals in scenario_raw.items() + } + + year_labels: list[int] = [int(y) for y in config["year_labels"]] + + run_years_raw: list[int] | None = config.get("run_years") + if run_years_raw is None: + run_year_indices = list(range(len(year_labels))) + else: + run_year_indices = [] + for yr in run_years_raw: + distances = [abs(yl - int(yr)) for yl in year_labels] + nearest_idx = int(np.argmin(distances)) + nearest_year = year_labels[nearest_idx] + if nearest_year != int(yr): + warnings.warn( + f"run_years entry {yr} not in year_labels; " + f"snapping to {nearest_year} (index {nearest_idx})", + stacklevel=2, + ) + run_year_indices.append(nearest_idx) + + return scenario_name, random_seed, scenario, year_labels, run_year_indices + + +# --------------------------------------------------------------------------- +# Path helpers +# --------------------------------------------------------------------------- + + +def _upgrade_dir_name(upgrade_id: int) -> str: + return f"upgrade={upgrade_id:02d}" + + +def _metadata_path( + path_resstock_release: Path, state_upper: str, upgrade_id: int +) -> Path: + return ( + path_resstock_release + / "metadata" + / f"state={state_upper}" + / _upgrade_dir_name(upgrade_id) + / "metadata-sb.parquet" + ) + + +def _loads_dir(path_resstock_release: Path, state_upper: str, upgrade_id: int) -> Path: + return ( + path_resstock_release + / "load_curve_hourly" + / f"state={state_upper}" + / _upgrade_dir_name(upgrade_id) + ) + + +def _check_upgrade_paths( + path_resstock_release: Path, + state_upper: str, + upgrade_ids: list[int], +) -> None: + """Raise FileNotFoundError listing all missing upgrade metadata paths.""" + missing: list[str] = [] + for uid in upgrade_ids: + p = _metadata_path(path_resstock_release, state_upper, uid) + if not p.exists(): + missing.append(str(p)) + if missing: + raise FileNotFoundError( + "Missing required upgrade metadata files:\n" + "\n".join(missing) + ) + + +def _check_loads_dirs( + path_resstock_release: Path, + state_upper: str, + upgrade_ids: list[int], +) -> None: + """Raise FileNotFoundError listing all missing loads directories.""" + missing: list[str] = [] + for uid in upgrade_ids: + d = _loads_dir(path_resstock_release, state_upper, uid) + if not d.is_dir(): + missing.append(str(d)) + if missing: + raise FileNotFoundError( + "Missing required loads directories:\n" + "\n".join(missing) + ) + + +# --------------------------------------------------------------------------- +# Building assignment +# --------------------------------------------------------------------------- + + +def assign_buildings( + eligible_bldg_ids: list[int], + scenario: dict[int, list[float]], + run_year_indices: list[int], + random_seed: int, +) -> dict[int, dict[int, int]]: + """Assign buildings to upgrades per run-year index. + + Only buildings that do **not** already have a heat pump should be passed + via ``eligible_bldg_ids``. Buildings already at HP in the baseline are + excluded upstream and kept pinned to upgrade 0 in all years. + + Args: + eligible_bldg_ids: Building IDs eligible for HP adoption (i.e. those + whose ``postprocess_group.has_hp`` is not True in upgrade-0 metadata). + scenario: Dict mapping upgrade_id → per-year cumulative adoption fractions. + Fractions are relative to the *total* building population, so the + caller is responsible for passing a proportionally correct subset. + run_year_indices: Indices into the scenario lists to materialise. + random_seed: Seed for reproducible shuffling. + + Returns: + ``{year_index: {bldg_id: upgrade_id}}`` — upgrade 0 means "baseline". + Only covers ``eligible_bldg_ids``; already-HP buildings are not included. + """ + n_bldgs = len(eligible_bldg_ids) + if n_bldgs == 0: + return {t: {} for t in run_year_indices} + + rng = np.random.default_rng(random_seed) + bldg_array = np.array(sorted(eligible_bldg_ids), dtype=np.int64) + rng.shuffle(bldg_array) + + upgrades_sorted = sorted(scenario.keys()) + num_years = len(next(iter(scenario.values()))) + last_t = num_years - 1 + + # Pre-allocate contiguous slot ranges using the last year's fractions + # (max fractions since they are non-decreasing). Slots don't overlap, + # and since total adoption <= 1.0 the ranges all fit within [0, N). + upgrade_offsets: dict[int, int] = {} + cumulative_offset = 0 + for u in upgrades_sorted: + upgrade_offsets[u] = cumulative_offset + max_count = int(n_bldgs * scenario[u][last_t]) + cumulative_offset += max_count + + result: dict[int, dict[int, int]] = {} + for t in run_year_indices: + assignments: dict[int, int] = {int(bid): 0 for bid in bldg_array} + for u in upgrades_sorted: + count_t = int(n_bldgs * scenario[u][t]) + offset = upgrade_offsets[u] + for i in range(count_t): + assignments[int(bldg_array[offset + i])] = u + result[t] = assignments + + return result + + +# --------------------------------------------------------------------------- +# Load-file discovery +# --------------------------------------------------------------------------- + + +def _build_load_file_map(loads_dir: Path, bldg_ids: set[int]) -> dict[int, Path]: + """Scan ``loads_dir`` and return ``{bldg_id: path}`` for each matching building. + + Files are expected to be named ``{bldg_id}-{something}.parquet``. Unmatched + files and files whose bldg_id is not in ``bldg_ids`` are silently skipped. + """ + result: dict[int, Path] = {} + for f in loads_dir.glob("*.parquet"): + parts = f.stem.split("-", maxsplit=1) + if not parts: + continue + try: + bldg_id = int(parts[0]) + except ValueError: + continue + if bldg_ids and bldg_id not in bldg_ids: + continue + result[bldg_id] = f + return result + + +# --------------------------------------------------------------------------- +# Main +# --------------------------------------------------------------------------- + + def main(argv: list[str] | None = None) -> None: - build_parser().parse_args(argv) - # TODO: implement - # 1. Load adoption config YAML and validate with buildstock_fetch.scenarios.validate_scenario() - # 2. Discover upgrade directories under args.path_resstock_release; error if any are missing. - # 3. Use MixedUpgradeScenario to assign buildings → upgrades per year. - # 4. For each year: - # a. Read metadata-sb.parquet from each required upgrade directory. - # b. Filter to correct buildings per upgrade. - # c. Combine and write to /year=/metadata-sb.parquet. - # d. Create /year=/loads/ with symlinks per building. - # 5. Write scenario CSV (bldg_id, year_0, year_1, ...) for reference. - raise NotImplementedError("materialize_mixed_upgrade is not yet implemented") + args = build_parser().parse_args(argv) + + path_adoption_config = Path(args.path_adoption_config) + path_resstock_release = Path(args.path_resstock_release) + path_output_dir = Path(args.path_output_dir) + state_upper = args.state.upper() + + # 1. Load and validate adoption config. + config = _load_adoption_config(path_adoption_config) + scenario_name, random_seed, scenario, year_labels, run_year_indices = ( + _parse_adoption_config(config) + ) + validate_scenario(scenario) + + non_baseline_upgrades = sorted(scenario.keys()) + all_upgrades = sorted({0} | set(non_baseline_upgrades)) + + print( + f"Materialising '{scenario_name}' for state={state_upper}, " + f"utility={args.utility}" + ) + print( + f" upgrades: {all_upgrades} | " + f"years: {[year_labels[t] for t in run_year_indices]}" + ) + + # 2. Verify all required upgrade directories exist. + _check_upgrade_paths(path_resstock_release, state_upper, all_upgrades) + _check_loads_dirs(path_resstock_release, state_upper, all_upgrades) + + # 3. Load baseline metadata; split into HP-eligible and already-HP buildings. + baseline_meta_path = _metadata_path(path_resstock_release, state_upper, 0) + baseline_df = pl.read_parquet(baseline_meta_path) + all_bldg_ids: list[int] = baseline_df["bldg_id"].to_list() + + # Buildings that already heat with a heat pump in the baseline must NOT be + # re-assigned — they are pinned to upgrade 0 in every year. + has_hp_col = "postprocess_group.has_hp" + if has_hp_col in baseline_df.columns: + already_hp_mask = baseline_df[has_hp_col] == True # noqa: E712 + already_hp_bldg_ids: list[int] = baseline_df.filter(already_hp_mask)[ + "bldg_id" + ].to_list() + eligible_bldg_ids: list[int] = baseline_df.filter(~already_hp_mask)[ + "bldg_id" + ].to_list() + else: + already_hp_bldg_ids = [] + eligible_bldg_ids = all_bldg_ids + + print( + f" total buildings (upgrade 0): {len(all_bldg_ids)} " + f"({len(eligible_bldg_ids)} HP-eligible, " + f"{len(already_hp_bldg_ids)} already have HP → kept at upgrade 0)" + ) + + # 4. Assign only eligible buildings to upgrades per run year. + eligible_assignments_by_year = assign_buildings( + eligible_bldg_ids, scenario, run_year_indices, random_seed + ) + + # Merge already-HP buildings back in (pinned to upgrade 0 in all years). + already_hp_baseline: dict[int, int] = {bid: 0 for bid in already_hp_bldg_ids} + assignments_by_year: dict[int, dict[int, int]] = { + t: {**eligible_assignments_by_year[t], **already_hp_baseline} + for t in run_year_indices + } + + # 5. Load all upgrade metadata DataFrames (indexed by bldg_id for fast lookup). + upgrade_dfs: dict[int, pl.DataFrame] = {0: baseline_df} + for uid in non_baseline_upgrades: + upgrade_dfs[uid] = pl.read_parquet( + _metadata_path(path_resstock_release, state_upper, uid) + ) + + path_output_dir.mkdir(parents=True, exist_ok=True) + + all_year_data: list[tuple[int, dict[int, int]]] = [] + + # 6. For each run year, write materialized metadata and load symlinks. + for t in run_year_indices: + calendar_year = year_labels[t] + year_dir = path_output_dir / f"year={calendar_year}" + year_dir.mkdir(parents=True, exist_ok=True) + + assignments = assignments_by_year[t] + + # Group buildings by their assigned upgrade for this year. + bldgs_by_upgrade: dict[int, list[int]] = {u: [] for u in all_upgrades} + for bldg_id, upgrade_id in assignments.items(): + bldgs_by_upgrade[upgrade_id].append(bldg_id) + + # Combine metadata from each upgrade, filtering to its assigned buildings. + parts: list[pl.DataFrame] = [] + for uid in all_upgrades: + bldg_ids_for_upgrade = bldgs_by_upgrade[uid] + if not bldg_ids_for_upgrade: + continue + df = upgrade_dfs[uid].filter(pl.col("bldg_id").is_in(bldg_ids_for_upgrade)) + parts.append(df) + + combined = pl.concat(parts) + combined.write_parquet(year_dir / "metadata-sb.parquet") + + # Create loads/ directory with symlinks per building. + loads_out_dir = year_dir / "loads" + loads_out_dir.mkdir(exist_ok=True) + + for uid in all_upgrades: + bldg_ids_for_upgrade = bldgs_by_upgrade[uid] + if not bldg_ids_for_upgrade: + continue + src_loads_dir = _loads_dir(path_resstock_release, state_upper, uid) + bldg_ids_set = set(bldg_ids_for_upgrade) + load_map = _build_load_file_map(src_loads_dir, bldg_ids_set) + + for bldg_id in bldg_ids_for_upgrade: + src_file = load_map.get(bldg_id) + if src_file is None: + raise FileNotFoundError( + f"No load file found for bldg_id={bldg_id} in {src_loads_dir}" + ) + dst = loads_out_dir / src_file.name + if dst.is_symlink() or dst.exists(): + dst.unlink() + os.symlink(src_file.resolve(), dst) + + n_assigned = sum(len(v) for v in bldgs_by_upgrade.values()) + n_hp = n_assigned - len(bldgs_by_upgrade[0]) + print( + f" year={calendar_year}: {n_assigned} buildings " + f"({n_hp} HP-upgraded, {len(bldgs_by_upgrade[0])} baseline)" + ) + all_year_data.append((calendar_year, assignments)) + + # 7. Write scenario CSV (bldg_id, year_, ...) for reference. + csv_path = path_output_dir / "scenario_assignments.csv" + with open(csv_path, "w", newline="", encoding="utf-8") as f: + writer = csv.writer(f) + header = ["bldg_id"] + [f"year_{yr}" for yr, _ in all_year_data] + writer.writerow(header) + for bldg_id in sorted(all_bldg_ids): + row: list[object] = [bldg_id] + [asgn[bldg_id] for _, asgn in all_year_data] + writer.writerow(row) + + print(f"Wrote scenario assignments to {csv_path}") + print(f"Done. Materialised {len(run_year_indices)} year(s) to {path_output_dir}") if __name__ == "__main__": From 82b23c0b6452bb1f448a21885c6a4ab22d0781d2 Mon Sep 17 00:00:00 2001 From: sherryzuo <145484385+sherryzuo@users.noreply.github.com> Date: Sun, 22 Mar 2026 20:20:22 +0000 Subject: [PATCH 04/19] Implement generate_adoption_scenario_yamls: per-year adoption scenario YAML Reads a base scenario YAML and an adoption config, then emits a new scenarios__adoption.yaml with one entry per (year x run) combination. For each generated entry: - path_resstock_metadata and path_resstock_loads are rewritten to point at the materialized data produced by materialize_mixed_upgrade. - year_run is updated to the calendar year for that adoption cohort. - All path strings containing year= are updated to year= so MC paths resolve to the correct Cambium year. - run_name gets a _y_mixed tag inserted before the double- underscore tariff suffix. Run keys follow the scheme (year_index + 1) * 100 + run_num so keys are unique across (year, run) combinations and predictable when passed to run-adoption-scenario. --- utils/pre/generate_adoption_scenario_yamls.py | 225 ++++++++++++++++-- 1 file changed, 210 insertions(+), 15 deletions(-) diff --git a/utils/pre/generate_adoption_scenario_yamls.py b/utils/pre/generate_adoption_scenario_yamls.py index 3f122b3d..d64813d7 100644 --- a/utils/pre/generate_adoption_scenario_yamls.py +++ b/utils/pre/generate_adoption_scenario_yamls.py @@ -4,8 +4,20 @@ YAML file (``scenarios__adoption.yaml``) with one entry per (year × run) combination. The per-year ``path_resstock_metadata`` and ``path_resstock_loads`` are rewritten to point at the materialized data -produced by ``materialize_mixed_upgrade.py``. ``run_name`` is also extended -with the year index and calendar year label. +produced by ``materialize_mixed_upgrade.py``. ``year_run`` and all path +strings containing ``year={old_year_run}`` are also updated to the calendar +year for each generated entry. + +Run keys in the output YAML use the scheme ``(year_index + 1) * 100 + run_num``: + +- Year index 0 (first run year), base run 1 → key 101 +- Year index 0, base run 2 → key 102 +- Year index 1 (second run year), base run 1 → key 201 +- Year index 1, base run 2 → key 202 +- … + +This ensures run keys are unique across (year, run) combinations and +memorable when passed to ``run-adoption-scenario``. Usage ----- @@ -17,14 +29,19 @@ --adoption-config rate_design/hp_rates/ny/config/adoption/nyca_electrification.yaml \\ --materialized-dir /ebs/data/nrel/resstock/res_2024_amy2018_2_sb/adoption/nyca_electrification \\ --output rate_design/hp_rates/ri/config/scenarios/scenarios_rie_adoption.yaml - -TODO: implement body — this is a skeleton stub. """ from __future__ import annotations import argparse +import copy import sys +import warnings +from pathlib import Path +from typing import Any + +import numpy as np +import yaml def build_parser() -> argparse.ArgumentParser: @@ -67,18 +84,196 @@ def build_parser() -> argparse.ArgumentParser: return p +# --------------------------------------------------------------------------- +# Adoption config helpers (mirrors materialize_mixed_upgrade logic) +# --------------------------------------------------------------------------- + + +def _load_yaml(path: Path) -> dict[str, Any]: + with open(path, encoding="utf-8") as f: + return yaml.safe_load(f) + + +def _resolve_run_years(config: dict[str, Any]) -> list[tuple[int, int]]: + """Return ``[(year_index, calendar_year), ...]`` to generate entries for. + + Uses ``run_years`` from the config when present; otherwise uses all + ``year_labels``. Snaps run_years entries to the nearest year_label when + an exact match is not found. + """ + year_labels: list[int] = [int(y) for y in config["year_labels"]] + run_years_raw: list[int] | None = config.get("run_years") + + if run_years_raw is None: + return list(enumerate(year_labels)) + + result: list[tuple[int, int]] = [] + for yr in run_years_raw: + distances = [abs(yl - int(yr)) for yl in year_labels] + nearest_idx = int(np.argmin(distances)) + nearest_year = year_labels[nearest_idx] + if nearest_year != int(yr): + warnings.warn( + f"run_years entry {yr} not in year_labels; " + f"snapping to {nearest_year} (index {nearest_idx})", + stacklevel=2, + ) + result.append((nearest_idx, nearest_year)) + return result + + +# --------------------------------------------------------------------------- +# Config transformation helpers +# --------------------------------------------------------------------------- + + +def _replace_year_in_value(value: Any, old_year: int, new_year: int) -> Any: + """Recursively replace ``year={old_year}`` with ``year={new_year}`` in strings.""" + if isinstance(value, str): + return value.replace(f"year={old_year}", f"year={new_year}") + if isinstance(value, dict): + return { + k: _replace_year_in_value(v, old_year, new_year) for k, v in value.items() + } + if isinstance(value, list): + return [_replace_year_in_value(item, old_year, new_year) for item in value] + return value + + +def _insert_blank_lines_between_runs(yaml_str: str) -> str: + """Insert a blank line before run keys 2+, not before the first run key.""" + lines = yaml_str.splitlines() + out: list[str] = [] + seen_run_key = False + for line in lines: + stripped = line.strip() + is_run_key = ( + line.startswith(" ") and stripped.endswith(":") and stripped[:-1].isdigit() + ) + if is_run_key and seen_run_key and (not out or out[-1] != ""): + out.append("") + if is_run_key: + seen_run_key = True + out.append(line) + return "\n".join(out) + ("\n" if yaml_str.endswith("\n") else "") + + +def _update_run_name(run_name: str, calendar_year: int) -> str: + """Append ``_y{year}_mixed`` to a run name (before any trailing double-underscore suffix). + + Examples: + ``ri_rie_run1_up00_precalc__flat`` → ``ri_rie_run1_y2025_mixed_precalc__flat`` + ``ny_nyseg_run5_up02_default__tou`` → ``ny_nyseg_run5_y2025_mixed_default__tou`` + """ + # Locate the first double-underscore which separates the "stem" from the tariff suffix. + double_us = run_name.find("__") + year_tag = f"_y{calendar_year}_mixed" + if double_us == -1: + return run_name + year_tag + return run_name[:double_us] + year_tag + run_name[double_us:] + + +# --------------------------------------------------------------------------- +# Main +# --------------------------------------------------------------------------- + + def main(argv: list[str] | None = None) -> None: - build_parser().parse_args(argv) - # TODO: implement - # 1. Load adoption config YAML for year_labels and scenario_name. - # 2. Load base scenario YAML and extract the specified run configs. - # 3. For each year index and each run number: - # a. Copy the run config. - # b. Replace path_resstock_metadata → /year=/metadata-sb.parquet - # c. Replace path_resstock_loads → /year=/loads/ - # d. Update run_name to include year index and label. - # 4. Write combined YAML to args.path_output. - raise NotImplementedError("generate_adoption_scenario_yamls is not yet implemented") + args = build_parser().parse_args(argv) + + path_base_scenario = Path(args.path_base_scenario) + path_adoption_config = Path(args.path_adoption_config) + path_materialized_dir = Path(args.path_materialized_dir) + path_output = Path(args.path_output) + + # Parse run numbers. + try: + run_nums = [int(r.strip()) for r in args.runs.split(",") if r.strip()] + except ValueError as exc: + raise ValueError( + f"--runs must be comma-separated integers, got: {args.runs!r}" + ) from exc + if not run_nums: + raise ValueError("--runs is empty; at least one run number is required.") + + # 1. Load adoption config for year info. + adoption_config = _load_yaml(path_adoption_config) + scenario_name: str = adoption_config["scenario_name"] + year_run_pairs = _resolve_run_years(adoption_config) + + # 2. Load base scenario YAML and extract requested run configs. + base_doc = _load_yaml(path_base_scenario) + base_runs: dict[int, dict[str, Any]] = { + int(k): v for k, v in base_doc.get("runs", {}).items() + } + + missing_runs = [r for r in run_nums if r not in base_runs] + if missing_runs: + available = sorted(base_runs.keys()) + raise KeyError( + f"Run(s) {missing_runs} not found in {path_base_scenario}. " + f"Available runs: {available}" + ) + + print( + f"Generating adoption scenario YAML for '{scenario_name}': " + f"{len(year_run_pairs)} year(s) × {len(run_nums)} run(s) = " + f"{len(year_run_pairs) * len(run_nums)} entries" + ) + + # 3. Build generated run entries. + output_runs: dict[int, dict[str, Any]] = {} + + for year_index, calendar_year in year_run_pairs: + meta_path = str( + path_materialized_dir / f"year={calendar_year}" / "metadata-sb.parquet" + ) + loads_path = str(path_materialized_dir / f"year={calendar_year}" / "loads" / "") + + for run_num in run_nums: + base_run = base_runs[run_num] + old_year_run = int(base_run.get("year_run", calendar_year)) + + # Deep-copy so base configs remain unmodified. + run_entry: dict[str, Any] = copy.deepcopy(base_run) + + # Replace ResStock data paths. + run_entry["path_resstock_metadata"] = meta_path + run_entry["path_resstock_loads"] = loads_path + + # Update year_run to the calendar year for this adoption cohort. + run_entry["year_run"] = calendar_year + + # Replace year= tokens in all string path values so MC data resolves + # to the correct Cambium year. + run_entry = _replace_year_in_value(run_entry, old_year_run, calendar_year) + + # Update run_name to include year and mixed tag. + run_entry["run_name"] = _update_run_name( + str(base_run.get("run_name", f"run{run_num}")), + calendar_year, + ) + + output_key = (year_index + 1) * 100 + run_num + output_runs[output_key] = run_entry + print( + f" [{output_key}] year={calendar_year}, " + f"base_run={run_num}: {run_entry['run_name']}" + ) + + # 4. Write combined YAML. + path_output.parent.mkdir(parents=True, exist_ok=True) + payload: dict[str, Any] = {"runs": output_runs} + yaml_str = yaml.dump( + payload, + default_flow_style=False, + sort_keys=False, + allow_unicode=True, + ) + yaml_str = _insert_blank_lines_between_runs(yaml_str) + path_output.write_text(yaml_str, encoding="utf-8") + + print(f"Wrote {len(output_runs)} run entries to {path_output}") if __name__ == "__main__": From b0380f6c6b0c8ae3edc2f8dc30e80f4493341bb4 Mon Sep 17 00:00:00 2001 From: sherryzuo <145484385+sherryzuo@users.noreply.github.com> Date: Sun, 22 Mar 2026 20:49:01 +0000 Subject: [PATCH 05/19] Add fit_adoption_config.py to generate NYCA adoption trajectory from logistic fit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Digitizes NYISO Gold Book 2025 NYCA electrification forecast, fits a logistic S-curve per technology, and writes the adoption config YAML with fractions evaluated at the Cambium 5-year run years (2025–2050). 2025 is forced to 0.0 (all buildings at upgrade-0 baseline). Also saves a curve-fit diagnostic plot alongside the YAML, and wires a `fit-adoption-config` recipe into the shared hp_rates Justfile so the YAML is always script-generated rather than hand-written. --- rate_design/hp_rates/Justfile | 10 + .../config/adoption/nyca_electrification.yaml | 31 +- utils/pre/fit_adoption_config.py | 423 ++++++++++++++++++ 3 files changed, 455 insertions(+), 9 deletions(-) create mode 100644 utils/pre/fit_adoption_config.py diff --git a/rate_design/hp_rates/Justfile b/rate_design/hp_rates/Justfile index 0bdd6bbb..09e6423a 100644 --- a/rate_design/hp_rates/Justfile +++ b/rate_design/hp_rates/Justfile @@ -683,6 +683,16 @@ run-subset runs: path_adoption_config_dir := path_config / "adoption" +# Fit logistic S-curves to NYISO Gold Book 2025 digitized data and write the +# adoption config YAML + a curve-fit diagnostic plot. +# +# Example: +# just s ny fit-adoption-config nyca_electrification +fit-adoption-config config_name="nyca_electrification": + uv run python {{ path_repo }}/utils/pre/fit_adoption_config.py \ + --output "{{ path_adoption_config_dir }}/{{ config_name }}.yaml" \ + --plot-output "{{ path_adoption_config_dir }}/{{ config_name }}_curves.png" + # Materialize per-year ResStock data for a mixed-upgrade adoption trajectory. # Reads the adoption config YAML, assigns buildings to upgrades per year, and # writes year=/ directories under the adoption output path. diff --git a/rate_design/hp_rates/ny/config/adoption/nyca_electrification.yaml b/rate_design/hp_rates/ny/config/adoption/nyca_electrification.yaml index a636ab45..aace3313 100644 --- a/rate_design/hp_rates/ny/config/adoption/nyca_electrification.yaml +++ b/rate_design/hp_rates/ny/config/adoption/nyca_electrification.yaml @@ -1,7 +1,8 @@ # NYCA building electrification adoption trajectory (NYISO Gold Book 2025). +# Generated by utils/pre/fit_adoption_config.py — do not edit by hand. # -# Fractions represent the share of total buildings assigned to each ResStock -# upgrade per year index. Remaining buildings stay at upgrade 0 (baseline). +# Fractions represent the share of total NYCA buildings assigned to each +# ResStock upgrade at each year. Remaining buildings stay at upgrade 0 (baseline). # Year indices map to calendar years via year_labels. # # Technology → ResStock upgrade mapping: @@ -10,16 +11,28 @@ # Ground Source HP → 5 (geothermal heat pump) # Supplemental Heat → 1 (ENERGY STAR ASHP, 50% capacity @ 5F, elec backup) # Electric Resistance → baseline upgrade 0, already captured there +# +# Methodology: logistic S-curves f(t) = L / (1 + exp(-k * (t - t0))) fit +# (scipy curve_fit) to housing-unit counts digitized from the NYISO Gold +# Book 2025 NYCA stacked-area chart. Denominator: 7,900,000 total NYCA +# occupied housing units (Census ACS / NYISO estimate). 2025 forced to 0.0 +# (all buildings at upgrade-0 baseline). +# +# Fitted parameters: +# upgrade 2 (ASHP full capacity): L=0.2168 k=0.2169 t0=2042.8 +# upgrade 4 (ASHP dual fuel): L=0.1087 k=0.2290 t0=2040.0 +# upgrade 5 (ground source HP): L=0.0115 k=0.2633 t0=2039.3 +# upgrade 1 (supplemental heat): L=0.1281 k=0.3098 t0=2040.2 scenario_name: nyca_electrification random_seed: 42 -# TODO: Replace placeholder fractions with values derived from NYISO Gold Book -# 2025 NYCA electrification forecast once data extraction is complete. scenario: - 2: [0.005, 0.015, 0.035, 0.060, 0.095, 0.130, 0.160] # ASHP full capacity - 4: [0.002, 0.008, 0.018, 0.030, 0.045, 0.060, 0.075] # ASHP dual fuel - 5: [0.001, 0.003, 0.005, 0.008, 0.012, 0.018, 0.025] # ground source HP - 1: [0.002, 0.006, 0.015, 0.030, 0.055, 0.090, 0.130] # supplemental heat + 2: [0.0000, 0.0128, 0.0339, 0.0767, 0.1340, 0.1794] # ASHP full capacity + 4: [0.0000, 0.0100, 0.0263, 0.0544, 0.0825, 0.0987] # ASHP dual fuel + 5: [0.0000, 0.0009, 0.0028, 0.0063, 0.0094, 0.0109] # ground source HP + 1: [0.0000, 0.0052, 0.0211, 0.0617, 0.1043, 0.1222] # supplemental heat -year_labels: [2025, 2028, 2031, 2034, 2037, 2040, 2043] +# Calendar years for each scenario index (= run years). +# Aligns with Cambium 5-year MC intervals; 2025 is baseline. +year_labels: [2025, 2030, 2035, 2040, 2045, 2050] diff --git a/utils/pre/fit_adoption_config.py b/utils/pre/fit_adoption_config.py new file mode 100644 index 00000000..071ebecb --- /dev/null +++ b/utils/pre/fit_adoption_config.py @@ -0,0 +1,423 @@ +"""Fit logistic S-curves to digitized NYISO data and write adoption config YAML. + +Source: NYISO Gold Book 2025, "Number of Residential Households Converted to +Electric Heating By Technology (NYCA)" stacked-area chart. + +Parametric form: f(t) = L / (1 + exp(-k * (t - t0))) + L = long-run saturation fraction of all NYCA housing units + k = growth rate + t0 = inflection year + +Fractions are normalized by 7,900,000 total NYCA occupied housing units +(Census ACS / NYISO Gold Book 2025 estimate). 2025 is forced to 0.0 — all +buildings remain at upgrade-0 baseline — regardless of the logistic value. + +Technology → ResStock upgrade mapping: + ASHP Full Capacity → 2 (cold-climate ASHP, 90% capacity @ 5F, elec backup) + ASHP Dual Fuel → 4 (ENERGY STAR ASHP + existing fossil backup) + Ground Source HP → 5 (geothermal heat pump) + Supplemental Heat → 1 (ENERGY STAR ASHP, 50% capacity @ 5F, elec backup) + Electric Resistance → baseline upgrade 0, already captured there + +Usage:: + + uv run python utils/pre/fit_adoption_config.py \\ + --output rate_design/hp_rates/ny/config/adoption/nyca_electrification.yaml \\ + --plot-output rate_design/hp_rates/ny/config/adoption/nyca_electrification_curves.png +""" + +from __future__ import annotations + +import argparse +import logging +from pathlib import Path + +import numpy as np +import polars as pl +from plotnine import ( + aes, + element_line, + element_text, + geom_line, + geom_point, + geom_vline, + ggplot, + labs, + scale_color_manual, + scale_x_continuous, + scale_y_continuous, + theme, + theme_minimal, +) +from scipy.optimize import curve_fit + +from buildstock_fetch.scenarios import validate_scenario + +logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s") +log = logging.getLogger(__name__) + +# --------------------------------------------------------------------------- +# Source data (NYISO Gold Book 2025) +# --------------------------------------------------------------------------- + +SCENARIO_NAME = "nyca_electrification" +RANDOM_SEED = 42 + +# Total NYCA occupied housing units used as the fraction denominator. +TOTAL_HU = 7_900_000.0 + +# Digitized from the NYISO Gold Book 2025 NYCA stacked-area chart. +# Each entry: (calendar_year, individual_technology_housing_units_in_thousands). +# 2025 is forced to 0.0 by the evaluation logic below. +_RAW_DATA: dict[int, list[tuple[int, float]]] = { + 2: [ # ASHP Full Capacity + (2030, 75), + (2035, 250), + (2040, 640), + (2045, 1050), + (2050, 1400), + (2057, 1650), + ], + 4: [ # ASHP Dual Fuel + (2030, 65), + (2035, 205), + (2040, 440), + (2045, 660), + (2050, 750), + (2057, 850), + ], + 5: [ # Ground Source HP + (2030, 10), + (2035, 20), + (2040, 50), + (2045, 75), + (2050, 85), + (2057, 90), + ], + 1: [ # Supplemental Heat + (2030, 35), + (2035, 210), + (2040, 440), + (2045, 870), + (2050, 950), + (2057, 1000), + ], +} + +_UPGRADE_LABELS: dict[int, str] = { + 2: "ASHP full capacity", + 4: "ASHP dual fuel", + 5: "ground source HP", + 1: "supplemental heat", +} + +# Wong colorblind-friendly palette matched to NYISO chart hues. +_UPGRADE_COLORS: dict[int, str] = { + 2: "#D55E00", # vermillion / orange + 4: "#999999", # gray + 5: "#0072B2", # blue + 1: "#009E73", # green +} + +_DEFAULT_RUN_YEARS: list[int] = [2025, 2030, 2035, 2040, 2045, 2050] + +# --------------------------------------------------------------------------- +# Logistic model +# --------------------------------------------------------------------------- + + +def _logistic(t: np.ndarray, L: float, k: float, t0: float) -> np.ndarray: + return L / (1.0 + np.exp(-k * (t - t0))) + + +def _fit_logistic(years: np.ndarray, fracs: np.ndarray) -> tuple[float, float, float]: + """Fit logistic to (years, fracs); return (L, k, t0).""" + L_min = float(fracs.max()) * 1.01 + p0 = [fracs.max() * 1.5, 0.10, 2045.0] + bounds = ([L_min, 0.001, 2020.0], [1.0, 1.0, 2080.0]) + popt, _ = curve_fit(_logistic, years, fracs, p0=p0, bounds=bounds, maxfev=20_000) + return float(popt[0]), float(popt[1]), float(popt[2]) + + +# --------------------------------------------------------------------------- +# Fit +# --------------------------------------------------------------------------- + + +def fit_all( + run_years: list[int], +) -> tuple[dict[int, list[float]], dict[int, tuple[float, float, float]]]: + """Fit logistic curves; return ``(scenario_fracs, params)``. + + ``scenario_fracs[upgrade_id][i]`` is the adoption fraction at + ``run_years[i]``. 2025 is forced to ``0.0``. + """ + scenario: dict[int, list[float]] = {} + params: dict[int, tuple[float, float, float]] = {} + + for upgrade_id, pts in _RAW_DATA.items(): + years_arr = np.array([y for y, _ in pts], dtype=float) + fracs_arr = np.array([hu * 1_000 / TOTAL_HU for _, hu in pts]) + + L, k, t0 = _fit_logistic(years_arr, fracs_arr) + params[upgrade_id] = (L, k, t0) + log.info( + "upgrade %d (%s): L=%.4f k=%.4f t0=%.1f", + upgrade_id, + _UPGRADE_LABELS[upgrade_id], + L, + k, + t0, + ) + + year_fracs: list[float] = [] + for yr in run_years: + if yr <= 2025: + year_fracs.append(0.0) + else: + val = float(_logistic(np.array([float(yr)]), L, k, t0)[0]) + year_fracs.append(round(val, 4)) + scenario[upgrade_id] = year_fracs + + return scenario, params + + +# --------------------------------------------------------------------------- +# YAML writer +# --------------------------------------------------------------------------- + + +def write_yaml( + path_output: Path, + scenario: dict[int, list[float]], + params: dict[int, tuple[float, float, float]], + run_years: list[int], +) -> None: + """Write adoption config YAML with full methodology commentary.""" + param_block = "\n".join( + f"# upgrade {uid} ({_UPGRADE_LABELS[uid]}): " + f"L={params[uid][0]:.4f} k={params[uid][1]:.4f} t0={params[uid][2]:.1f}" + for uid in [2, 4, 5, 1] + ) + + scenario_block = "\n".join( + f" {uid}: [{', '.join(f'{v:.4f}' for v in scenario[uid])}]" + f" # {_UPGRADE_LABELS[uid]}" + for uid in [2, 4, 5, 1] + ) + + year_labels_str = "[" + ", ".join(str(y) for y in run_years) + "]" + + lines = [ + "# NYCA building electrification adoption trajectory (NYISO Gold Book 2025).", + "# Generated by utils/pre/fit_adoption_config.py — do not edit by hand.", + "#", + "# Fractions represent the share of total NYCA buildings assigned to each", + "# ResStock upgrade at each year. Remaining buildings stay at upgrade 0 (baseline).", + "# Year indices map to calendar years via year_labels.", + "#", + "# Technology → ResStock upgrade mapping:", + "# ASHP Full Capacity → 2 (cold-climate ASHP, 90% capacity @ 5F, elec backup)", + "# ASHP Dual Fuel → 4 (ENERGY STAR ASHP + existing fossil backup)", + "# Ground Source HP → 5 (geothermal heat pump)", + "# Supplemental Heat → 1 (ENERGY STAR ASHP, 50% capacity @ 5F, elec backup)", + "# Electric Resistance → baseline upgrade 0, already captured there", + "#", + "# Methodology: logistic S-curves f(t) = L / (1 + exp(-k * (t - t0))) fit", + "# (scipy curve_fit) to housing-unit counts digitized from the NYISO Gold", + f"# Book 2025 NYCA stacked-area chart. Denominator: {TOTAL_HU:,.0f} total NYCA", + "# occupied housing units (Census ACS / NYISO estimate). 2025 forced to 0.0", + "# (all buildings at upgrade-0 baseline).", + "#", + "# Fitted parameters:", + param_block, + "", + f"scenario_name: {SCENARIO_NAME}", + f"random_seed: {RANDOM_SEED}", + "", + "scenario:", + scenario_block, + "", + "# Calendar years for each scenario index (= run years).", + "# Aligns with Cambium 5-year MC intervals; 2025 is baseline.", + f"year_labels: {year_labels_str}", + "", + ] + + path_output.parent.mkdir(parents=True, exist_ok=True) + path_output.write_text("\n".join(lines), encoding="utf-8") + log.info("wrote %s", path_output) + + +# --------------------------------------------------------------------------- +# Plot +# --------------------------------------------------------------------------- + +_PLOT_YEARS_DENSE = np.linspace(2024, 2058, 400) + + +def make_plot( + params: dict[int, tuple[float, float, float]], + run_years: list[int], + path_plot: Path, +) -> None: + """Save a plotnine figure: continuous logistic curves + digitized points.""" + # Build long-format DataFrame for fitted curves. + curve_rows: list[dict] = [] + for uid, (L, k, t0) in params.items(): + fracs = _logistic(_PLOT_YEARS_DENSE, L, k, t0) + for yr, frac in zip(_PLOT_YEARS_DENSE, fracs): + pct = float(frac) * 100.0 + # Clip negative values that arise from logistic tails near 2024. + curve_rows.append( + { + "year": float(yr), + "technology": _UPGRADE_LABELS[uid], + "pct": max(pct, 0.0), + } + ) + + curves_df = pl.DataFrame(curve_rows) + + # Build long-format DataFrame for digitized source points (excluding 2025=0). + point_rows: list[dict] = [] + for uid, pts in _RAW_DATA.items(): + for yr, hu_k in pts: + point_rows.append( + { + "year": float(yr), + "technology": _UPGRADE_LABELS[uid], + "pct": hu_k * 1_000 / TOTAL_HU * 100.0, + } + ) + + points_df = pl.DataFrame(point_rows) + + # Ordered technology names for the legend (matches NYISO chart order, bottom→top). + tech_order = [_UPGRADE_LABELS[uid] for uid in [2, 4, 5, 1]] + color_map = {_UPGRADE_LABELS[uid]: _UPGRADE_COLORS[uid] for uid in [2, 4, 5, 1]} + + # Convert to pandas for plotnine; use pandas Categorical for legend order. + import pandas as pd # noqa: PLC0415 + + curves_pd = curves_df.to_pandas() + curves_pd["technology"] = pd.Categorical( + curves_pd["technology"], categories=tech_order + ) + + points_pd = points_df.to_pandas() + points_pd["technology"] = pd.Categorical( + points_pd["technology"], categories=tech_order + ) + + vlines_df = pl.DataFrame( + {"year": [float(y) for y in run_years if y > 2025]} + ).to_pandas() + + p = ( + ggplot() + + geom_vline( + data=vlines_df, + mapping=aes(xintercept="year"), + color="#cccccc", + linetype="dashed", + size=0.5, + ) + + geom_line( + data=curves_pd, + mapping=aes(x="year", y="pct", color="technology"), + size=1.0, + ) + + geom_point( + data=points_pd, + mapping=aes(x="year", y="pct", color="technology"), + size=2.5, + shape="o", + fill="white", + stroke=1.2, + ) + + scale_color_manual(values=color_map, breaks=tech_order) + + scale_x_continuous( + breaks=list(range(2025, 2060, 5)), + limits=(2024, 2058), + ) + + scale_y_continuous( + labels=lambda x: [f"{v:.0f}%" for v in x], + ) + + labs( + title="NYCA HP adoption trajectory — NYISO Gold Book 2025 logistic fit", + x="Year", + y="Share of NYCA housing units", + color="Technology", + ) + + theme_minimal() + + theme( + plot_title=element_text(size=11), + axis_title=element_text(size=10), + legend_title=element_text(size=9), + legend_text=element_text(size=9), + panel_grid_minor=element_line(size=0), + ) + ) + + path_plot.parent.mkdir(parents=True, exist_ok=True) + p.save(str(path_plot), dpi=150, width=9, height=5) + log.info("wrote %s", path_plot) + + +# --------------------------------------------------------------------------- +# CLI +# --------------------------------------------------------------------------- + + +def build_parser() -> argparse.ArgumentParser: + parser = argparse.ArgumentParser( + description="Fit NYISO adoption S-curves and write adoption config YAML.", + ) + parser.add_argument( + "--output", + required=True, + metavar="PATH", + dest="path_output", + help="Destination YAML path (e.g. rate_design/hp_rates/ny/config/adoption/nyca_electrification.yaml).", + ) + parser.add_argument( + "--plot-output", + metavar="PATH", + dest="path_plot", + default=None, + help="Optional path for the curve-fit diagnostic plot (.png).", + ) + parser.add_argument( + "--run-years", + metavar="YEARS", + default=",".join(str(y) for y in _DEFAULT_RUN_YEARS), + help=( + "Comma-separated calendar years to evaluate and write to the YAML. " + f"Default: {','.join(str(y) for y in _DEFAULT_RUN_YEARS)}" + ), + ) + return parser + + +def main() -> None: + args = build_parser().parse_args() + run_years = [int(y.strip()) for y in args.run_years.split(",")] + + scenario, params = fit_all(run_years) + + validate_scenario({uid: scenario[uid] for uid in scenario}) + + # Log per-year totals. + for i, yr in enumerate(run_years): + total = sum(scenario[uid][i] for uid in scenario) + log.info("year %d: total fraction = %.4f", yr, total) + + write_yaml(Path(args.path_output), scenario, params, run_years) + + if args.path_plot: + make_plot(params, run_years, Path(args.path_plot)) + + +if __name__ == "__main__": + main() From d4f0d06026c61a13903cc29a59aa1661ba0d45f3 Mon Sep 17 00:00:00 2001 From: sherryzuo <145484385+sherryzuo@users.noreply.github.com> Date: Sun, 22 Mar 2026 20:54:08 +0000 Subject: [PATCH 06/19] Fix run-adoption-all: move shebang to first line (mixed regular+shebang was invalid Just syntax) --- rate_design/hp_rates/Justfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rate_design/hp_rates/Justfile b/rate_design/hp_rates/Justfile index 09e6423a..eeb8d755 100644 --- a/rate_design/hp_rates/Justfile +++ b/rate_design/hp_rates/Justfile @@ -738,12 +738,12 @@ run-adoption-scenario run_num: # Example: # RDP_BATCH=ny_20260320_adoption just s ny run-adoption-all nyca_electrification 1,2 run-adoption-all config_name="default" runs="1,2": - just materialize-adoption "{{ config_name }}" - just generate-adoption-scenarios "{{ config_name }}" "{{ runs }}" #!/usr/bin/env bash set -euo pipefail : "${RDP_BATCH:?Set RDP_BATCH before running}" export RDP_BATCH + just materialize-adoption "{{ config_name }}" + just generate-adoption-scenarios "{{ config_name }}" "{{ runs }}" IFS=',' read -ra nums <<< "{{ runs }}" for num in "${nums[@]}"; do echo ">> run-adoption-all: run-${num}" >&2 From 93fd6b645804c1102652512124fa781b7bd5034e Mon Sep 17 00:00:00 2001 From: sherryzuo <145484385+sherryzuo@users.noreply.github.com> Date: Sun, 22 Mar 2026 21:40:54 +0000 Subject: [PATCH 07/19] Wire adoption trajectory recipes into shared Justfile - Fix run-adoption-scenario: add required --state, --output-dir, and run log (consistent with run-scenario) - Fix run-adoption-all: iterate over year-indexed keys (year_index+1)*100+run_num instead of base run nums; reads run_years count from adoption config via inline Python - Update generate-adoption-scenarios and run-adoption-all defaults to runs="1,2,5,6" - fit-adoption-config: wire --stacked-plot-output arg - data/resstock: add convenience shortcuts and end-to-end recipe for adoption-trajectory upgrades 01, 04, 05 (NY) --- data/resstock/Justfile | 28 ++++++++++ rate_design/hp_rates/Justfile | 39 +++++++++++--- utils/pre/fit_adoption_config.py | 88 +++++++++++++++++++++++++++++++- 3 files changed, 146 insertions(+), 9 deletions(-) diff --git a/data/resstock/Justfile b/data/resstock/Justfile index 1f35a8d3..deeef363 100644 --- a/data/resstock/Justfile +++ b/data/resstock/Justfile @@ -304,3 +304,31 @@ create-sb-release-for-upgrade-02-RI: sudo aws s3 sync s3://data.sb/nrel/resstock/res_2024_amy2018_2_sb/ /ebs/data/nrel/resstock/res_2024_amy2018_2_sb/ just add-monthly-loads RI "00 02" just upload-monthly-loads RI "00 02" + +# ============================================================================= +# Adoption trajectory upgrades (01, 04, 05): convenience shortcuts and end-to-end recipes +# ============================================================================= + +adjust-mf-electricity-NY-upgrade-01: + just adjust-mf-electricity NY res_2024_amy2018_2 res_2024_amy2018_2_sb "01" + +adjust-mf-electricity-NY-upgrade-04: + just adjust-mf-electricity NY res_2024_amy2018_2 res_2024_amy2018_2_sb "04" + +adjust-mf-electricity-NY-upgrade-05: + just adjust-mf-electricity NY res_2024_amy2018_2 res_2024_amy2018_2_sb "05" + +# Copy, adjust loads, and sync upgrades 01, 04, 05 into the _sb release for NY. +# Assumes prepare-metadata-ny has already been run (it processes all upgrades 00-05). +# metadata_utility (utility assignment) is upgrade-independent and is not re-copied here. +create-sb-release-for-adoption-upgrades-NY: + just copy-resstock-data-2024-amy2018-2-NY "01 04 05" "metadata load_curve_hourly" + just approximate-non-hp-load NY 01 res_2024_amy2018_2 res_2024_amy2018_2_sb 15 True True + just approximate-non-hp-load NY 04 res_2024_amy2018_2 res_2024_amy2018_2_sb 15 True True + just approximate-non-hp-load NY 05 res_2024_amy2018_2 res_2024_amy2018_2_sb 15 True True + just adjust-mf-electricity-NY-upgrade-01 + just adjust-mf-electricity-NY-upgrade-04 + just adjust-mf-electricity-NY-upgrade-05 + sudo aws s3 sync s3://data.sb/nrel/resstock/res_2024_amy2018_2_sb/ /ebs/data/nrel/resstock/res_2024_amy2018_2_sb/ + just add-monthly-loads NY "01 04 05" + just upload-monthly-loads NY "01 04 05" diff --git a/rate_design/hp_rates/Justfile b/rate_design/hp_rates/Justfile index eeb8d755..c52c04b7 100644 --- a/rate_design/hp_rates/Justfile +++ b/rate_design/hp_rates/Justfile @@ -691,7 +691,8 @@ path_adoption_config_dir := path_config / "adoption" fit-adoption-config config_name="nyca_electrification": uv run python {{ path_repo }}/utils/pre/fit_adoption_config.py \ --output "{{ path_adoption_config_dir }}/{{ config_name }}.yaml" \ - --plot-output "{{ path_adoption_config_dir }}/{{ config_name }}_curves.png" + --plot-output "{{ path_adoption_config_dir }}/{{ config_name }}_curves.png" \ + --stacked-plot-output "{{ path_adoption_config_dir }}/{{ config_name }}_stacked.png" # Materialize per-year ResStock data for a mixed-upgrade adoption trajectory. # Reads the adoption config YAML, assigns buildings to upgrades per year, and @@ -712,7 +713,7 @@ materialize-adoption config_name="default": # # Example: # just s ny generate-adoption-scenarios nyca_electrification 1,2,5,6 -generate-adoption-scenarios config_name="default" runs="1,2": +generate-adoption-scenarios config_name="default" runs="1,2,5,6": uv run python {{ path_repo }}/utils/pre/generate_adoption_scenario_yamls.py \ --base-scenario "{{ path_scenario_config }}" \ --runs "{{ runs }}" \ @@ -721,6 +722,8 @@ generate-adoption-scenarios config_name="default" runs="1,2": --output "{{ path_scenarios }}/scenarios_{{ utility }}_adoption.yaml" # Run a single adoption scenario by (year-indexed) run number. +# Run keys use the scheme (year_index + 1) * 100 + base_run_num, matching the +# output of generate_adoption_scenario_yamls.py (e.g. 101, 102, 201, 202, ...). # # Example: # just s ny run-adoption-scenario 101 @@ -729,25 +732,45 @@ run-adoption-scenario run_num: set -euo pipefail : "${RDP_BATCH:?Set RDP_BATCH before running}" export RDP_BATCH + log_dir="${HOME}/rdp_run_logs" + mkdir -p "${log_dir}" + log_file="${log_dir}/{{ utility }}_adoption_run{{ run_num }}_${RDP_BATCH}.log" + echo ">> run-adoption-scenario {{ run_num }}: logging to ${log_file}" >&2 uv run python {{ path_repo }}/rate_design/hp_rates/run_scenario.py \ + --state "{{ state }}" \ --scenario-config "{{ path_scenarios }}/scenarios_{{ utility }}_adoption.yaml" \ - --run-num "{{ run_num }}" + --run-num "{{ run_num }}" \ + --output-dir "{{ path_outputs_base }}/${RDP_BATCH}" \ + 2>&1 | tee "${log_file}" # Orchestrate the full adoption pipeline: materialize → generate scenarios → run all. +# Iterates over all (year × run) combinations using the key scheme +# (year_index + 1) * 100 + base_run_num produced by generate_adoption_scenario_yamls.py. # # Example: # RDP_BATCH=ny_20260320_adoption just s ny run-adoption-all nyca_electrification 1,2 -run-adoption-all config_name="default" runs="1,2": +run-adoption-all config_name="default" runs="1,2,5,6": #!/usr/bin/env bash set -euo pipefail : "${RDP_BATCH:?Set RDP_BATCH before running}" export RDP_BATCH just materialize-adoption "{{ config_name }}" just generate-adoption-scenarios "{{ config_name }}" "{{ runs }}" - IFS=',' read -ra nums <<< "{{ runs }}" - for num in "${nums[@]}"; do - echo ">> run-adoption-all: run-${num}" >&2 - just run-adoption-scenario "${num}" + # Determine number of run years from the adoption config. + run_years_count=$(uv run python -c " +import yaml +with open('{{ path_adoption_config_dir }}/{{ config_name }}.yaml') as f: + cfg = yaml.safe_load(f) +years = cfg.get('run_years', cfg.get('year_labels', [])) +print(len(years)) +") + IFS=',' read -ra base_nums <<< "{{ runs }}" + for yi in $(seq 1 "$run_years_count"); do + for base_run in "${base_nums[@]}"; do + key=$((yi * 100 + base_run)) + echo ">> run-adoption-all: run-${key} (year_index=$((yi - 1)), base_run=${base_run})" >&2 + just run-adoption-scenario "${key}" + done done # ============================================================================= diff --git a/utils/pre/fit_adoption_config.py b/utils/pre/fit_adoption_config.py index 071ebecb..76286742 100644 --- a/utils/pre/fit_adoption_config.py +++ b/utils/pre/fit_adoption_config.py @@ -38,12 +38,14 @@ aes, element_line, element_text, + geom_area, geom_line, geom_point, geom_vline, ggplot, labs, scale_color_manual, + scale_fill_manual, scale_x_continuous, scale_y_continuous, theme, @@ -365,6 +367,80 @@ def make_plot( log.info("wrote %s", path_plot) +def make_stacked_plot( + params: dict[int, tuple[float, float, float]], + run_years: list[int], + path_plot: Path, +) -> None: + """Save a stacked area chart matching the NYISO Gold Book visual style.""" + import pandas as pd # noqa: PLC0415 + + # Stacking order bottom→top mirrors the NYISO chart. + stack_order = [_UPGRADE_LABELS[uid] for uid in [2, 4, 5, 1]] + fill_map = {_UPGRADE_LABELS[uid]: _UPGRADE_COLORS[uid] for uid in [2, 4, 5, 1]} + + curve_rows: list[dict] = [] + for uid, (L, k, t0) in params.items(): + fracs = _logistic(_PLOT_YEARS_DENSE, L, k, t0) + for yr, frac in zip(_PLOT_YEARS_DENSE, fracs): + curve_rows.append( + { + "year": float(yr), + "technology": _UPGRADE_LABELS[uid], + "pct": max(float(frac) * 100.0, 0.0), + } + ) + + curves_pd = pl.DataFrame(curve_rows).to_pandas() + # Reverse stack order so the first level sits at the bottom in geom_area. + curves_pd["technology"] = pd.Categorical( + curves_pd["technology"], categories=stack_order + ) + + vlines_df = pl.DataFrame( + {"year": [float(y) for y in run_years if y > 2025]} + ).to_pandas() + + p = ( + ggplot(curves_pd, aes(x="year", y="pct", fill="technology")) + + geom_area(position="stack", alpha=0.9) + + geom_vline( + data=vlines_df, + mapping=aes(xintercept="year"), + color="white", + linetype="dashed", + size=0.5, + alpha=0.7, + ) + + scale_fill_manual(values=fill_map, breaks=stack_order) + + scale_x_continuous( + breaks=list(range(2025, 2060, 5)), + limits=(2024, 2058), + ) + + scale_y_continuous( + labels=lambda x: [f"{v:.0f}%" for v in x], + ) + + labs( + title="NYCA HP adoption trajectory — NYISO Gold Book 2025 logistic fit (stacked)", + x="Year", + y="Share of NYCA housing units", + fill="Technology", + ) + + theme_minimal() + + theme( + plot_title=element_text(size=11), + axis_title=element_text(size=10), + legend_title=element_text(size=9), + legend_text=element_text(size=9), + panel_grid_minor=element_line(size=0), + ) + ) + + path_plot.parent.mkdir(parents=True, exist_ok=True) + p.save(str(path_plot), dpi=150, width=9, height=5) + log.info("wrote %s", path_plot) + + # --------------------------------------------------------------------------- # CLI # --------------------------------------------------------------------------- @@ -386,7 +462,14 @@ def build_parser() -> argparse.ArgumentParser: metavar="PATH", dest="path_plot", default=None, - help="Optional path for the curve-fit diagnostic plot (.png).", + help="Optional path for the curve-fit line plot (.png).", + ) + parser.add_argument( + "--stacked-plot-output", + metavar="PATH", + dest="path_stacked_plot", + default=None, + help="Optional path for the stacked area plot (.png).", ) parser.add_argument( "--run-years", @@ -418,6 +501,9 @@ def main() -> None: if args.path_plot: make_plot(params, run_years, Path(args.path_plot)) + if args.path_stacked_plot: + make_stacked_plot(params, run_years, Path(args.path_stacked_plot)) + if __name__ == "__main__": main() From 10534992bdb5ce186cfec154b29d51fd6e9732ec Mon Sep 17 00:00:00 2001 From: sherryzuo <145484385+sherryzuo@users.noreply.github.com> Date: Mon, 23 Mar 2026 00:45:59 +0000 Subject: [PATCH 08/19] Add tests for materialize_mixed_upgrade 40 tests covering building assignment fractions/monotonicity, metadata combination, symlink creation, scenario CSV output, validation error paths, config parsing, and load file map scanning. --- tests/pre/test_materialize_mixed_upgrade.py | 809 ++++++++++++++++++-- 1 file changed, 753 insertions(+), 56 deletions(-) diff --git a/tests/pre/test_materialize_mixed_upgrade.py b/tests/pre/test_materialize_mixed_upgrade.py index 64bc9b7e..1c982ed6 100644 --- a/tests/pre/test_materialize_mixed_upgrade.py +++ b/tests/pre/test_materialize_mixed_upgrade.py @@ -1,78 +1,775 @@ -"""Tests for utils/pre/materialize_mixed_upgrade.py. - -TODO: implement tests — this is a skeleton stub. - -Planned coverage: -- Building assignment logic: correct fraction of buildings assigned to each - upgrade per year (monotonicity, no double-assignment). -- Metadata combination: correct columns present, correct rows per upgrade. -- Symlink creation: correct targets, correct filenames - (``{bldg_id}-{upgrade_id}.parquet``). -- Scenario CSV output: structure and values. -- Validation error paths: - - upgrade data missing on disk. - - fractions outside [0, 1]. - - total fraction > 1.0. -""" +"""Tests for utils/pre/materialize_mixed_upgrade.py.""" +from __future__ import annotations + +import csv +from pathlib import Path + +import polars as pl import pytest +from buildstock_fetch.scenarios import InvalidScenarioError, validate_scenario +from utils.pre.materialize_mixed_upgrade import ( + _build_load_file_map, + _parse_adoption_config, + assign_buildings, + main, +) + +# --------------------------------------------------------------------------- +# Fixtures: minimal in-memory data +# --------------------------------------------------------------------------- + +N_BLDGS = 100 + +# Simple two-upgrade scenario with 3 years. +SCENARIO_2UP = { + 2: [0.10, 0.20, 0.30], + 4: [0.05, 0.10, 0.15], +} +RUN_YEAR_INDICES = [0, 1, 2] + + +def _bldg_ids(n: int = N_BLDGS) -> list[int]: + return list(range(1, n + 1)) + + +def _make_metadata_df( + bldg_ids: list[int], + has_hp: list[bool] | None = None, +) -> pl.DataFrame: + """Return a minimal metadata DataFrame with the columns that main() uses.""" + n = len(bldg_ids) + if has_hp is None: + has_hp = [False] * n + return pl.DataFrame( + { + "bldg_id": bldg_ids, + "postprocess_group.has_hp": has_hp, + "postprocess_group.heating_type": ["Gas"] * n, + "in.vintage_acs": ["2000s"] * n, + "applicability": [True] * n, + } + ) + + +def _write_metadata(path: Path, df: pl.DataFrame) -> None: + path.parent.mkdir(parents=True, exist_ok=True) + df.write_parquet(path) + + +def _touch_load_file(loads_dir: Path, bldg_id: int, upgrade_id: int) -> Path: + loads_dir.mkdir(parents=True, exist_ok=True) + p = loads_dir / f"{bldg_id}-{upgrade_id:02d}.parquet" + p.touch() + return p + + +# --------------------------------------------------------------------------- +# 1. Building assignment: fractions +# --------------------------------------------------------------------------- + + +class TestAssignBuildingsFractions: + """assign_buildings() assigns approximately the right fraction to each upgrade.""" + + def test_correct_fraction_single_upgrade(self) -> None: + bldg_ids = _bldg_ids(200) + scenario = {2: [0.10, 0.25, 0.50]} + assignments = assign_buildings(bldg_ids, scenario, [0, 1, 2], random_seed=0) + + for t, expected_frac in zip([0, 1, 2], [0.10, 0.25, 0.50]): + assigned_to_2 = sum(1 for v in assignments[t].values() if v == 2) + assert assigned_to_2 == int(200 * expected_frac), ( + f"year index {t}: expected {int(200 * expected_frac)} buildings " + f"assigned to upgrade 2, got {assigned_to_2}" + ) + + def test_correct_fractions_two_upgrades(self) -> None: + bldg_ids = _bldg_ids(N_BLDGS) + assignments = assign_buildings( + bldg_ids, SCENARIO_2UP, RUN_YEAR_INDICES, random_seed=42 + ) + + for t in RUN_YEAR_INDICES: + for u, fracs in SCENARIO_2UP.items(): + expected = int(N_BLDGS * fracs[t]) + actual = sum(1 for v in assignments[t].values() if v == u) + assert actual == expected, ( + f"upgrade={u} year={t}: expected {expected}, got {actual}" + ) + + def test_all_buildings_covered(self) -> None: + bldg_ids = _bldg_ids(N_BLDGS) + assignments = assign_buildings( + bldg_ids, SCENARIO_2UP, RUN_YEAR_INDICES, random_seed=42 + ) + for t in RUN_YEAR_INDICES: + assert set(assignments[t].keys()) == set(bldg_ids) + + def test_remaining_buildings_stay_at_baseline(self) -> None: + """Buildings not yet assigned to any upgrade must be upgrade 0.""" + bldg_ids = _bldg_ids(N_BLDGS) + assignments = assign_buildings( + bldg_ids, SCENARIO_2UP, RUN_YEAR_INDICES, random_seed=42 + ) + for t in RUN_YEAR_INDICES: + total_hp = sum( + 1 for v in assignments[t].values() if v in SCENARIO_2UP.keys() + ) + total_baseline = sum(1 for v in assignments[t].values() if v == 0) + assert total_hp + total_baseline == N_BLDGS + + def test_empty_bldg_list_returns_empty(self) -> None: + result = assign_buildings([], SCENARIO_2UP, RUN_YEAR_INDICES, random_seed=0) + for t in RUN_YEAR_INDICES: + assert result[t] == {} + + def test_zero_fractions_all_baseline(self) -> None: + bldg_ids = _bldg_ids(50) + scenario = {2: [0.0, 0.0, 0.0]} + assignments = assign_buildings(bldg_ids, scenario, [0, 1, 2], random_seed=0) + for t in [0, 1, 2]: + assert all(v == 0 for v in assignments[t].values()) + + +# --------------------------------------------------------------------------- +# 2. Building assignment: monotonicity +# --------------------------------------------------------------------------- + + +class TestAssignBuildingsMonotonic: + """Buildings that adopt in year N must keep their upgrade in year N+1.""" + + def test_monotonic_adoption(self) -> None: + bldg_ids = _bldg_ids(N_BLDGS) + # Fractions increase over time — monotonic adoption. + scenario = {2: [0.10, 0.20, 0.30]} + assignments = assign_buildings(bldg_ids, scenario, [0, 1, 2], random_seed=1) + + adopted_t0 = {bid for bid, u in assignments[0].items() if u == 2} + adopted_t1 = {bid for bid, u in assignments[1].items() if u == 2} + adopted_t2 = {bid for bid, u in assignments[2].items() if u == 2} + + # Every building that adopted in year t must still have the same upgrade in t+1. + assert adopted_t0.issubset(adopted_t1), "Some t0-adopters reverted in t1" + assert adopted_t1.issubset(adopted_t2), "Some t1-adopters reverted in t2" + + def test_no_building_assigned_two_upgrades(self) -> None: + bldg_ids = _bldg_ids(N_BLDGS) + assignments = assign_buildings( + bldg_ids, SCENARIO_2UP, RUN_YEAR_INDICES, random_seed=7 + ) + for t in RUN_YEAR_INDICES: + for bid, uid in assignments[t].items(): + # At any given year each building has exactly one upgrade. + assert uid in {0} | set(SCENARIO_2UP.keys()) + + def test_reproducible_with_same_seed(self) -> None: + bldg_ids = _bldg_ids(N_BLDGS) + a1 = assign_buildings(bldg_ids, SCENARIO_2UP, RUN_YEAR_INDICES, random_seed=99) + a2 = assign_buildings(bldg_ids, SCENARIO_2UP, RUN_YEAR_INDICES, random_seed=99) + assert a1 == a2 + + def test_different_seeds_differ(self) -> None: + bldg_ids = _bldg_ids(200) + a1 = assign_buildings(bldg_ids, SCENARIO_2UP, RUN_YEAR_INDICES, random_seed=1) + a2 = assign_buildings(bldg_ids, SCENARIO_2UP, RUN_YEAR_INDICES, random_seed=2) + # With 200 buildings and non-trivial fractions it is astronomically unlikely + # for both seeds to produce identical assignments. + assert a1[0] != a2[0] + + +# --------------------------------------------------------------------------- +# 3. Metadata combination — unit tests via main() +# --------------------------------------------------------------------------- + + +class TestMetadataCombination: + """Combined metadata parquet has correct columns and row count.""" + + @pytest.fixture() + def fs(self, tmp_path: Path) -> Path: + """Build a minimal on-disk fixture and return the release root.""" + release = tmp_path / "release" + bldg_ids = list(range(1, 11)) # 10 buildings + # 3 buildings already have HPs in baseline; remaining 7 are eligible. + has_hp = [False] * 7 + [True] * 3 + + for uid in [0, 2]: + meta_path = ( + release + / "metadata" + / "state=NY" + / f"upgrade={uid:02d}" + / "metadata-sb.parquet" + ) + df = _make_metadata_df(bldg_ids, has_hp if uid == 0 else None) + _write_metadata(meta_path, df) + loads_dir = ( + release / "load_curve_hourly" / "state=NY" / f"upgrade={uid:02d}" + ) + for bid in bldg_ids: + _touch_load_file(loads_dir, bid, uid) + + return release + + @pytest.fixture() + def adoption_yaml(self, tmp_path: Path) -> Path: + content = ( + "scenario_name: test_scenario\n" + "random_seed: 0\n" + "scenario:\n" + " 2: [0.20, 0.40]\n" + "year_labels: [2025, 2030]\n" + ) + p = tmp_path / "adoption.yaml" + p.write_text(content, encoding="utf-8") + return p + + def test_all_required_columns_present( + self, fs: Path, adoption_yaml: Path, tmp_path: Path + ) -> None: + out_dir = tmp_path / "out" + main( + [ + "--state", + "ny", + "--utility", + "test", + "--adoption-config", + str(adoption_yaml), + "--path-resstock-release", + str(fs), + "--output-dir", + str(out_dir), + ] + ) + for year in [2025, 2030]: + df = pl.read_parquet(out_dir / f"year={year}" / "metadata-sb.parquet") + for col in [ + "bldg_id", + "postprocess_group.has_hp", + "postprocess_group.heating_type", + "in.vintage_acs", + "applicability", + ]: + assert col in df.columns, f"Missing column '{col}' for year={year}" + + def test_each_building_appears_exactly_once( + self, fs: Path, adoption_yaml: Path, tmp_path: Path + ) -> None: + out_dir = tmp_path / "out" + main( + [ + "--state", + "ny", + "--utility", + "test", + "--adoption-config", + str(adoption_yaml), + "--path-resstock-release", + str(fs), + "--output-dir", + str(out_dir), + ] + ) + for year in [2025, 2030]: + df = pl.read_parquet(out_dir / f"year={year}" / "metadata-sb.parquet") + assert df.shape[0] == 10, ( + f"year={year}: expected 10 rows, got {df.shape[0]}" + ) + assert df["bldg_id"].n_unique() == 10, f"year={year}: duplicate bldg_ids" + + def test_already_hp_buildings_pinned_to_baseline( + self, fs: Path, adoption_yaml: Path, tmp_path: Path + ) -> None: + """The 3 buildings that already have HP stay at upgrade-0 metadata in all years.""" + out_dir = tmp_path / "out" + main( + [ + "--state", + "ny", + "--utility", + "test", + "--adoption-config", + str(adoption_yaml), + "--path-resstock-release", + str(fs), + "--output-dir", + str(out_dir), + ] + ) + for year in [2025, 2030]: + df = pl.read_parquet(out_dir / f"year={year}" / "metadata-sb.parquet") + # Buildings 8, 9, 10 have has_hp=True in baseline (indices 7-9, bldg_ids 8-10). + already_hp_ids = [8, 9, 10] + already_hp_df = df.filter(pl.col("bldg_id").is_in(already_hp_ids)) + assert already_hp_df["postprocess_group.has_hp"].to_list() == [True] * 3 + + def test_more_hp_buildings_at_later_year( + self, fs: Path, adoption_yaml: Path, tmp_path: Path + ) -> None: + """Later years should have a higher fraction of HP buildings.""" + out_dir = tmp_path / "out" + main( + [ + "--state", + "ny", + "--utility", + "test", + "--adoption-config", + str(adoption_yaml), + "--path-resstock-release", + str(fs), + "--output-dir", + str(out_dir), + ] + ) + df_early = pl.read_parquet(out_dir / "year=2025" / "metadata-sb.parquet") + df_late = pl.read_parquet(out_dir / "year=2030" / "metadata-sb.parquet") + + # Upgrade-2 metadata has has_hp=None (defaulted to False in fixture). + # Count rows with has_hp True: comes from already-HP + newly assigned. + hp_early = df_early["postprocess_group.has_hp"].sum() + hp_late = df_late["postprocess_group.has_hp"].sum() + assert hp_late >= hp_early + + +# --------------------------------------------------------------------------- +# 4. Symlink creation +# --------------------------------------------------------------------------- + + +class TestSymlinkCreation: + """loads/ directory contains correctly targeted symlinks.""" + + @pytest.fixture() + def fs_and_out(self, tmp_path: Path) -> tuple[Path, Path]: + """Fixture: 5 buildings, upgrades 0 and 2.""" + release = tmp_path / "release" + bldg_ids = list(range(1, 6)) + + for uid in [0, 2]: + meta = ( + release + / "metadata" + / "state=RI" + / f"upgrade={uid:02d}" + / "metadata-sb.parquet" + ) + _write_metadata(meta, _make_metadata_df(bldg_ids)) + loads_dir = ( + release / "load_curve_hourly" / "state=RI" / f"upgrade={uid:02d}" + ) + for bid in bldg_ids: + _touch_load_file(loads_dir, bid, uid) + + adoption_yaml = tmp_path / "adoption.yaml" + adoption_yaml.write_text( + "scenario_name: test\nrandom_seed: 0\nscenario:\n 2: [0.40]\n" + "year_labels: [2025]\n", + encoding="utf-8", + ) + out_dir = tmp_path / "out" + main( + [ + "--state", + "ri", + "--utility", + "test", + "--adoption-config", + str(adoption_yaml), + "--path-resstock-release", + str(release), + "--output-dir", + str(out_dir), + ] + ) + return release, out_dir + + def test_loads_dir_exists(self, fs_and_out: tuple[Path, Path]) -> None: + _, out_dir = fs_and_out + assert (out_dir / "year=2025" / "loads").is_dir() + + def test_symlink_count_equals_building_count( + self, fs_and_out: tuple[Path, Path] + ) -> None: + _, out_dir = fs_and_out + links = list((out_dir / "year=2025" / "loads").iterdir()) + assert len(links) == 5 + + def test_symlinks_are_actual_symlinks(self, fs_and_out: tuple[Path, Path]) -> None: + _, out_dir = fs_and_out + for p in (out_dir / "year=2025" / "loads").iterdir(): + assert p.is_symlink(), f"{p} is not a symlink" + + def test_symlink_targets_exist(self, fs_and_out: tuple[Path, Path]) -> None: + release, out_dir = fs_and_out + for p in (out_dir / "year=2025" / "loads").iterdir(): + assert p.resolve().exists(), f"Dangling symlink: {p}" + + def test_symlink_filename_convention(self, fs_and_out: tuple[Path, Path]) -> None: + """All symlink names follow {bldg_id}-{upgrade_id}.parquet.""" + _, out_dir = fs_and_out + for p in (out_dir / "year=2025" / "loads").iterdir(): + stem = p.stem # e.g. "3-02" + parts = stem.split("-", maxsplit=1) + assert len(parts) == 2, f"Unexpected filename: {p.name}" + bldg_id_str, upgrade_str = parts + assert bldg_id_str.isdigit(), f"Non-numeric bldg_id in {p.name}" + assert upgrade_str.isdigit(), f"Non-numeric upgrade_id in {p.name}" + + def test_assigned_buildings_link_to_correct_upgrade( + self, fs_and_out: tuple[Path, Path] + ) -> None: + """Buildings assigned to upgrade 2 must symlink to upgrade=02 load files.""" + release, out_dir = fs_and_out + loads_dir = out_dir / "year=2025" / "loads" + for link in loads_dir.iterdir(): + target = link.resolve() + # The upgrade_id is encoded in the filename (e.g. "3-02.parquet"). + stem = link.stem + upgrade_str = stem.split("-", maxsplit=1)[1] + expected_upgrade_dir = f"upgrade={int(upgrade_str):02d}" + assert expected_upgrade_dir in str(target), ( + f"Symlink {link.name} → {target} does not point into {expected_upgrade_dir}" + ) + + +# --------------------------------------------------------------------------- +# 5. Scenario CSV output +# --------------------------------------------------------------------------- + + +class TestScenarioCsv: + """Scenario CSV is written with the correct structure.""" + + @pytest.fixture() + def out_dir(self, tmp_path: Path) -> Path: + release = tmp_path / "release" + bldg_ids = list(range(1, 9)) + + for uid in [0, 2]: + meta = ( + release + / "metadata" + / "state=NY" + / f"upgrade={uid:02d}" + / "metadata-sb.parquet" + ) + _write_metadata(meta, _make_metadata_df(bldg_ids)) + ld = release / "load_curve_hourly" / "state=NY" / f"upgrade={uid:02d}" + for bid in bldg_ids: + _touch_load_file(ld, bid, uid) + + adoption_yaml = tmp_path / "adoption.yaml" + adoption_yaml.write_text( + "scenario_name: test\nrandom_seed: 0\nscenario:\n" + " 2: [0.25, 0.50]\nyear_labels: [2025, 2030]\n", + encoding="utf-8", + ) + out = tmp_path / "out" + main( + [ + "--state", + "ny", + "--utility", + "test", + "--adoption-config", + str(adoption_yaml), + "--path-resstock-release", + str(release), + "--output-dir", + str(out), + ] + ) + return out + + def test_csv_exists(self, out_dir: Path) -> None: + assert (out_dir / "scenario_assignments.csv").exists() + + def test_csv_columns(self, out_dir: Path) -> None: + with open( + out_dir / "scenario_assignments.csv", newline="", encoding="utf-8" + ) as f: + reader = csv.reader(f) + header = next(reader) + assert header[0] == "bldg_id" + assert "year_2025" in header + assert "year_2030" in header + + def test_csv_row_count(self, out_dir: Path) -> None: + with open( + out_dir / "scenario_assignments.csv", newline="", encoding="utf-8" + ) as f: + rows = list(csv.reader(f)) + # header + 8 buildings + assert len(rows) == 9 + + def test_csv_values_are_valid_upgrade_ids(self, out_dir: Path) -> None: + with open( + out_dir / "scenario_assignments.csv", newline="", encoding="utf-8" + ) as f: + reader = csv.DictReader(f) + for row in reader: + for col in ["year_2025", "year_2030"]: + assert int(row[col]) in {0, 2}, ( + f"Unexpected upgrade id in CSV: {row[col]}" + ) + + def test_csv_later_year_has_more_or_equal_adopters(self, out_dir: Path) -> None: + with open( + out_dir / "scenario_assignments.csv", newline="", encoding="utf-8" + ) as f: + reader = csv.DictReader(f) + rows = list(reader) + adopters_2025 = sum(1 for r in rows if int(r["year_2025"]) != 0) + adopters_2030 = sum(1 for r in rows if int(r["year_2030"]) != 0) + assert adopters_2030 >= adopters_2025 + + +# --------------------------------------------------------------------------- +# 6. Validation error paths +# --------------------------------------------------------------------------- + + +class TestValidationErrors: + """Error conditions: missing upgrade dirs, invalid fractions, etc.""" + + def test_missing_upgrade_metadata_raises(self, tmp_path: Path) -> None: + release = tmp_path / "release" + # Only create upgrade=00 metadata; upgrade=02 is missing. + meta = release / "metadata" / "state=NY" / "upgrade=00" / "metadata-sb.parquet" + _write_metadata(meta, _make_metadata_df([1, 2])) + + adoption_yaml = tmp_path / "adoption.yaml" + adoption_yaml.write_text( + "scenario_name: t\nrandom_seed: 0\nscenario:\n 2: [0.10]\n" + "year_labels: [2025]\n", + encoding="utf-8", + ) + with pytest.raises(FileNotFoundError, match="upgrade=02"): + main( + [ + "--state", + "ny", + "--utility", + "test", + "--adoption-config", + str(adoption_yaml), + "--path-resstock-release", + str(release), + "--output-dir", + str(tmp_path / "out"), + ] + ) + + def test_missing_loads_dir_raises(self, tmp_path: Path) -> None: + release = tmp_path / "release" + # Create metadata for both upgrades but omit loads dir for upgrade=02. + for uid in [0, 2]: + meta = ( + release + / "metadata" + / "state=NY" + / f"upgrade={uid:02d}" + / "metadata-sb.parquet" + ) + _write_metadata(meta, _make_metadata_df([1, 2])) + loads_dir_0 = release / "load_curve_hourly" / "state=NY" / "upgrade=00" + loads_dir_0.mkdir(parents=True) + + adoption_yaml = tmp_path / "adoption.yaml" + adoption_yaml.write_text( + "scenario_name: t\nrandom_seed: 0\nscenario:\n 2: [0.10]\n" + "year_labels: [2025]\n", + encoding="utf-8", + ) + with pytest.raises(FileNotFoundError, match="upgrade=02"): + main( + [ + "--state", + "ny", + "--utility", + "test", + "--adoption-config", + str(adoption_yaml), + "--path-resstock-release", + str(release), + "--output-dir", + str(tmp_path / "out"), + ] + ) + + def test_fractions_outside_range_raise(self) -> None: + with pytest.raises(InvalidScenarioError): + validate_scenario({2: [0.05, 1.10]}) # 1.10 > 1.0 + + def test_negative_fraction_raises(self) -> None: + with pytest.raises(InvalidScenarioError): + validate_scenario({2: [-0.01, 0.05]}) + + def test_non_monotonic_fractions_raise(self) -> None: + with pytest.raises(InvalidScenarioError): + validate_scenario({2: [0.30, 0.10]}) # decreasing + + def test_total_exceeds_one_raises(self) -> None: + with pytest.raises(InvalidScenarioError): + validate_scenario( + {2: [0.60, 0.70], 4: [0.50, 0.40]} + ) # sums to >1.0 in year 0 + + def test_missing_load_file_for_building_raises(self, tmp_path: Path) -> None: + """FileNotFoundError when a building's load file is absent from the loads dir.""" + release = tmp_path / "release" + bldg_ids = list(range(1, 4)) + + for uid in [0, 2]: + meta = ( + release + / "metadata" + / "state=NY" + / f"upgrade={uid:02d}" + / "metadata-sb.parquet" + ) + _write_metadata(meta, _make_metadata_df(bldg_ids)) + ld = release / "load_curve_hourly" / "state=NY" / f"upgrade={uid:02d}" + for bid in bldg_ids: + _touch_load_file(ld, bid, uid) + + # Remove load file for one building in upgrade=02. + missing = ( + release / "load_curve_hourly" / "state=NY" / "upgrade=02" / "2-02.parquet" + ) + missing.unlink() + + adoption_yaml = tmp_path / "adoption.yaml" + # Assign enough buildings to upgrade 2 so bldg_id=2 gets assigned. + adoption_yaml.write_text( + "scenario_name: t\nrandom_seed: 0\nscenario:\n 2: [0.67]\n" + "year_labels: [2025]\n", + encoding="utf-8", + ) + # The exact building assigned depends on shuffling; we accept either a + # successful run (if bldg_id=2 was not assigned to upgrade 2) or an error. + # To force the error deterministically, assign all buildings to upgrade 2. + adoption_yaml.write_text( + "scenario_name: t\nrandom_seed: 0\nscenario:\n 2: [1.00]\n" + "year_labels: [2025]\n", + encoding="utf-8", + ) + with pytest.raises(FileNotFoundError): + main( + [ + "--state", + "ny", + "--utility", + "test", + "--adoption-config", + str(adoption_yaml), + "--path-resstock-release", + str(release), + "--output-dir", + str(tmp_path / "out"), + ] + ) -@pytest.mark.skip(reason="Not yet implemented") -def test_building_assignment_fractions() -> None: - """Correct fraction of buildings is assigned to each upgrade per year.""" - raise NotImplementedError +# --------------------------------------------------------------------------- +# 7. Config parsing: run_years snap + year indices +# --------------------------------------------------------------------------- -@pytest.mark.skip(reason="Not yet implemented") -def test_building_assignment_monotonic() -> None: - """Buildings that adopted in year N retain their upgrade in year N+1.""" - raise NotImplementedError +class TestParseAdoptionConfig: + """_parse_adoption_config correctly handles run_years and year snapping.""" -@pytest.mark.skip(reason="Not yet implemented") -def test_metadata_combination_columns() -> None: - """Combined metadata parquet contains all required CAIRO columns.""" - raise NotImplementedError + def test_all_years_when_run_years_omitted(self) -> None: + config = { + "scenario_name": "test", + "random_seed": 1, + "scenario": {2: [0.1, 0.2, 0.3]}, + "year_labels": [2025, 2030, 2035], + } + _, _, _, year_labels, run_year_indices = _parse_adoption_config(config) + assert run_year_indices == [0, 1, 2] + assert year_labels == [2025, 2030, 2035] + def test_run_years_subset_selects_correct_indices(self) -> None: + config = { + "scenario_name": "test", + "random_seed": 1, + "scenario": {2: [0.1, 0.2, 0.3]}, + "year_labels": [2025, 2030, 2035], + "run_years": [2025, 2035], + } + _, _, _, _, run_year_indices = _parse_adoption_config(config) + assert run_year_indices == [0, 2] -@pytest.mark.skip(reason="Not yet implemented") -def test_metadata_combination_row_count() -> None: - """Each building appears exactly once in the combined metadata.""" - raise NotImplementedError + def test_run_years_snaps_to_nearest(self) -> None: + config = { + "scenario_name": "test", + "random_seed": 1, + "scenario": {2: [0.1, 0.2, 0.3]}, + "year_labels": [2025, 2030, 2035], + "run_years": [2028], + } + with pytest.warns(UserWarning, match="snapping"): + _, _, _, _, run_year_indices = _parse_adoption_config(config) + assert run_year_indices == [1] # snaps to 2030 + def test_string_keys_normalised_to_int(self) -> None: + config = { + "scenario_name": "test", + "random_seed": 1, + "scenario": {"2": [0.1, 0.2], "4": [0.05, 0.10]}, + "year_labels": [2025, 2030], + } + _, _, scenario, _, _ = _parse_adoption_config(config) + assert 2 in scenario + assert 4 in scenario + assert "2" not in scenario -@pytest.mark.skip(reason="Not yet implemented") -def test_symlink_targets_correct() -> None: - """Symlinks in loads/ point to the correct upgrade's parquet file.""" - raise NotImplementedError +# --------------------------------------------------------------------------- +# 8. _build_load_file_map +# --------------------------------------------------------------------------- -@pytest.mark.skip(reason="Not yet implemented") -def test_symlink_filenames_match_cairo_convention() -> None: - """Symlink names follow the {bldg_id}-{upgrade_id}.parquet pattern.""" - raise NotImplementedError +class TestBuildLoadFileMap: + """_build_load_file_map scans a directory and returns {bldg_id: path}.""" -@pytest.mark.skip(reason="Not yet implemented") -def test_scenario_csv_written() -> None: - """Scenario CSV is written with bldg_id and one column per year.""" - raise NotImplementedError + def test_finds_matching_files(self, tmp_path: Path) -> None: + d = tmp_path / "loads" + d.mkdir() + (d / "1-02.parquet").touch() + (d / "3-02.parquet").touch() + (d / "99-02.parquet").touch() + result = _build_load_file_map(d, {1, 3, 99}) + assert set(result.keys()) == {1, 3, 99} -@pytest.mark.skip(reason="Not yet implemented") -def test_missing_upgrade_directory_raises() -> None: - """Error is raised when a required upgrade directory does not exist.""" - raise NotImplementedError + def test_filters_to_requested_bldg_ids(self, tmp_path: Path) -> None: + d = tmp_path / "loads" + d.mkdir() + (d / "1-02.parquet").touch() + (d / "2-02.parquet").touch() + (d / "3-02.parquet").touch() + result = _build_load_file_map(d, {1, 3}) + assert set(result.keys()) == {1, 3} -@pytest.mark.skip(reason="Not yet implemented") -def test_invalid_fractions_raise() -> None: - """Fractions outside [0, 1] are rejected by validate_scenario().""" - raise NotImplementedError + def test_ignores_non_parquet_files(self, tmp_path: Path) -> None: + d = tmp_path / "loads" + d.mkdir() + (d / "1-02.parquet").touch() + (d / "readme.txt").touch() + result = _build_load_file_map(d, {1}) + assert set(result.keys()) == {1} -@pytest.mark.skip(reason="Not yet implemented") -def test_total_fraction_exceeds_one_raises() -> None: - """Total fraction > 1.0 across upgrades is rejected by validate_scenario().""" - raise NotImplementedError + def test_empty_directory_returns_empty(self, tmp_path: Path) -> None: + d = tmp_path / "loads" + d.mkdir() + result = _build_load_file_map(d, {1, 2}) + assert result == {} From 0eba2e7d99ae23319ed92c86822e0f4a56344485 Mon Sep 17 00:00:00 2001 From: sherryzuo <145484385+sherryzuo@users.noreply.github.com> Date: Mon, 23 Mar 2026 01:11:53 +0000 Subject: [PATCH 09/19] Add residual_cost_frac support to run_scenario.py Add `residual_cost_frac: float | None` to `ScenarioSettings`. When set, skip `utility_revenue_requirement` parsing (using a sentinel `rr_total=0.0`) and pass `residual_cost_frac` through to `_return_revenue_requirement_target`; derive `revenue_requirement` from `costs_by_type["Total System Costs ($)"]` when the call returns `None` (i.e. 0% residual = RR equals total MC). Enforce mutual exclusion: raise `ValueError` if both `residual_cost_frac` and a non-empty `utility_revenue_requirement` are present in the YAML run. Empty/none/null values for `utility_revenue_requirement` are treated as absent. Existing runs are unaffected (`residual_cost_frac` defaults to `None`). --- rate_design/hp_rates/run_scenario.py | 52 +++++++++++++++++++++++----- 1 file changed, 43 insertions(+), 9 deletions(-) diff --git a/rate_design/hp_rates/run_scenario.py b/rate_design/hp_rates/run_scenario.py index d5249b1c..4bec325a 100644 --- a/rate_design/hp_rates/run_scenario.py +++ b/rate_design/hp_rates/run_scenario.py @@ -117,6 +117,7 @@ class ScenarioSettings: path_tou_supply_energy_mc: str | Path | None = None path_tou_supply_capacity_mc: str | Path | None = None path_supply_ancillary_mc: str | Path | None = None + residual_cost_frac: float | None = None def apply_prototype_sample( @@ -241,13 +242,34 @@ def _build_settings_from_yaml_run( _require_value(run, "run_includes_subclasses"), "run_includes_subclasses", ) - rr_config: RevenueRequirementConfig = _parse_utility_revenue_requirement( - _require_value(run, "utility_revenue_requirement"), - path_config, - raw_path_tariffs_electric, - add_supply=run_includes_supply, - run_includes_subclasses=run_includes_subclasses, - ) + residual_cost_frac_raw = run.get("residual_cost_frac") + residual_cost_frac: float | None = None + if residual_cost_frac_raw is not None: + residual_cost_frac = _parse_float(residual_cost_frac_raw, "residual_cost_frac") + urr_raw = run.get("utility_revenue_requirement") + urr_present = urr_raw is not None and str(urr_raw).strip() not in ("", "none", "null") + if residual_cost_frac is not None and urr_present: + raise ValueError( + "Specify exactly one of 'residual_cost_frac' or 'utility_revenue_requirement', " + "not both. Set 'utility_revenue_requirement: none' (or omit it) when using " + "residual_cost_frac." + ) + if residual_cost_frac is not None: + # When residual_cost_frac is set, revenue requirement is derived at runtime + # from ResStock loads × MC prices; no YAML RR file is needed. + rr_config = RevenueRequirementConfig( + rr_total=0.0, + subclass_rr=None, + run_includes_subclasses=run_includes_subclasses, + ) + else: + rr_config = _parse_utility_revenue_requirement( + _require_value(run, "utility_revenue_requirement"), + path_config, + raw_path_tariffs_electric, + add_supply=run_includes_supply, + run_includes_subclasses=run_includes_subclasses, + ) path_tariff_maps_gas = _resolve_path( str(_require_value(run, "path_tariff_maps_gas")), path_config, @@ -322,6 +344,7 @@ def _build_settings_from_yaml_run( path_supply_ancillary_mc=path_supply_ancillary_mc if run_includes_supply else None, + residual_cost_frac=residual_cost_frac, ) @@ -674,13 +697,24 @@ def run(settings: ScenarioSettings, num_workers: int | None = None) -> None: ) = _return_revenue_requirement_target( building_load=raw_load_elec, sample_weight=customer_metadata[["bldg_id", "weight"]], - revenue_requirement_target=settings.rr_total, + revenue_requirement_target=settings.rr_total + if settings.residual_cost_frac is None + else None, residual_cost=None, - residual_cost_frac=None, + residual_cost_frac=settings.residual_cost_frac, bulk_marginal_costs=bulk_marginal_costs, distribution_marginal_costs=dist_and_sub_tx_marginal_costs, low_income_strategy=None, ) + if revenue_requirement is None: + # residual_cost_frac was set: RR = Total MC (0% residual) + revenue_requirement = float(costs_by_type["Total System Costs ($)"]) + log.info( + "residual_cost_frac=%.4f: revenue_requirement derived from " + "Total System Costs = $%.0f", + settings.residual_cost_frac, + revenue_requirement, + ) effective_load_elec = raw_load_elec elasticity_tracker = pd.DataFrame() if settings.run_includes_subclasses: From 0c1747afce0e1a9b2220f63f9359dd097e3e4f63 Mon Sep 17 00:00:00 2001 From: sherryzuo <145484385+sherryzuo@users.noreply.github.com> Date: Mon, 23 Mar 2026 15:55:05 +0000 Subject: [PATCH 10/19] get rid of approximation for upgrades 4,5 --- data/resstock/Justfile | 2 -- 1 file changed, 2 deletions(-) diff --git a/data/resstock/Justfile b/data/resstock/Justfile index deeef363..6071cf11 100644 --- a/data/resstock/Justfile +++ b/data/resstock/Justfile @@ -324,8 +324,6 @@ adjust-mf-electricity-NY-upgrade-05: create-sb-release-for-adoption-upgrades-NY: just copy-resstock-data-2024-amy2018-2-NY "01 04 05" "metadata load_curve_hourly" just approximate-non-hp-load NY 01 res_2024_amy2018_2 res_2024_amy2018_2_sb 15 True True - just approximate-non-hp-load NY 04 res_2024_amy2018_2 res_2024_amy2018_2_sb 15 True True - just approximate-non-hp-load NY 05 res_2024_amy2018_2 res_2024_amy2018_2_sb 15 True True just adjust-mf-electricity-NY-upgrade-01 just adjust-mf-electricity-NY-upgrade-04 just adjust-mf-electricity-NY-upgrade-05 From d8afe214d6d9bffac4e8937c31f2919ce651ae5a Mon Sep 17 00:00:00 2001 From: sherryzuo <145484385+sherryzuo@users.noreply.github.com> Date: Mon, 23 Mar 2026 16:08:22 +0000 Subject: [PATCH 11/19] add comment --- data/resstock/Justfile | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/data/resstock/Justfile b/data/resstock/Justfile index 6071cf11..2a46ba3b 100644 --- a/data/resstock/Justfile +++ b/data/resstock/Justfile @@ -322,9 +322,9 @@ adjust-mf-electricity-NY-upgrade-05: # Assumes prepare-metadata-ny has already been run (it processes all upgrades 00-05). # metadata_utility (utility assignment) is upgrade-independent and is not re-copied here. create-sb-release-for-adoption-upgrades-NY: - just copy-resstock-data-2024-amy2018-2-NY "01 04 05" "metadata load_curve_hourly" - just approximate-non-hp-load NY 01 res_2024_amy2018_2 res_2024_amy2018_2_sb 15 True True - just adjust-mf-electricity-NY-upgrade-01 + just copy-resstock-data-2024-amy2018-2-NY "04 05" "metadata load_curve_hourly" + # just approximate-non-hp-load NY 01 res_2024_amy2018_2 res_2024_amy2018_2_sb 15 True True + # just adjust-mf-electricity-NY-upgrade-01 just adjust-mf-electricity-NY-upgrade-04 just adjust-mf-electricity-NY-upgrade-05 sudo aws s3 sync s3://data.sb/nrel/resstock/res_2024_amy2018_2_sb/ /ebs/data/nrel/resstock/res_2024_amy2018_2_sb/ From a732a171693db6eb984868b1270ff7138adda86d Mon Sep 17 00:00:00 2001 From: sherryzuo <145484385+sherryzuo@users.noreply.github.com> Date: Mon, 23 Mar 2026 16:20:39 +0000 Subject: [PATCH 12/19] Restrict HP adoption assignment to physically applicable buildings For each non-baseline upgrade in the mixed-upgrade materialization, build the candidate pool from buildings where has_hp=True in that upgrade's metadata. This prevents GSHP being assigned to ductless buildings and dual-fuel ASHP being assigned to electrically-heated buildings, both of which carry only baseline loads in their upgrade=N data. Monotonicity is preserved: the per-upgrade pool is shuffled once and adoption expands by taking the first N elements each year. Also update the data/resstock Justfile to reflect that approximation is skipped for upgrades 4 and 5 (only upgrade 01 is approximated). --- data/resstock/Justfile | 5 +- rate_design/hp_rates/Justfile | 99 +++++++++++ tests/pre/test_materialize_mixed_upgrade.py | 153 +++++++++++++++-- utils/pre/fit_adoption_config.py | 154 +++++++++++++++--- utils/pre/generate_adoption_scenario_yamls.py | 97 ++++++++++- .../generate_utility_tx_dx_mc.py | 145 ++++++++++++++--- utils/pre/materialize_mixed_upgrade.py | 120 +++++++++++--- 7 files changed, 689 insertions(+), 84 deletions(-) diff --git a/data/resstock/Justfile b/data/resstock/Justfile index 2a46ba3b..c1b5dc4d 100644 --- a/data/resstock/Justfile +++ b/data/resstock/Justfile @@ -321,10 +321,11 @@ adjust-mf-electricity-NY-upgrade-05: # Copy, adjust loads, and sync upgrades 01, 04, 05 into the _sb release for NY. # Assumes prepare-metadata-ny has already been run (it processes all upgrades 00-05). # metadata_utility (utility assignment) is upgrade-independent and is not re-copied here. +# We are NOT running approximate-non-hp-load for upgrades 4 and 5 because they strictly only apply to certain building types. create-sb-release-for-adoption-upgrades-NY: just copy-resstock-data-2024-amy2018-2-NY "04 05" "metadata load_curve_hourly" - # just approximate-non-hp-load NY 01 res_2024_amy2018_2 res_2024_amy2018_2_sb 15 True True - # just adjust-mf-electricity-NY-upgrade-01 + just approximate-non-hp-load NY 01 res_2024_amy2018_2 res_2024_amy2018_2_sb 15 True True + just adjust-mf-electricity-NY-upgrade-01 just adjust-mf-electricity-NY-upgrade-04 just adjust-mf-electricity-NY-upgrade-05 sudo aws s3 sync s3://data.sb/nrel/resstock/res_2024_amy2018_2_sb/ /ebs/data/nrel/resstock/res_2024_amy2018_2_sb/ diff --git a/rate_design/hp_rates/Justfile b/rate_design/hp_rates/Justfile index c52c04b7..efcb2156 100644 --- a/rate_design/hp_rates/Justfile +++ b/rate_design/hp_rates/Justfile @@ -85,6 +85,7 @@ path_utility_assignment := path_resstock_release + "/metadata_utility" path_resstock_loads_00 := path_resstock_release + "/load_curve_hourly/state=" + state_upper + "/upgrade=" + upgrade path_mc_table := env_var_or_default('MC_TABLE', path_config / "marginal_costs" / state + "_marginal_costs_" + year + ".csv") path_s3_mc_output := "s3://data.sb/switchbox/marginal_costs/" + state + "/dist_and_sub_tx/" +path_s3_mc_output_cambium := "s3://data.sb/switchbox/marginal_costs/" + state + "/cambium_dist_and_sub_tx/" path_s3_utility_loads := "s3://data.sb/eia/hourly_demand/utilities/" path_electric_utility_stats := "s3://data.sb/eia/861/electric_utility_stats/year=2024/state=" + state_upper + "/data.parquet" path_genability := path_rev_requirement / "top-ups" @@ -337,6 +338,35 @@ create-dist-and-sub-tx-mc-data-all: UTILITY="$util" just create-dist-and-sub-tx-mc-data done +# Distribution marginal costs using Cambium busbar_load as the PoP allocation load shape. +# Writes to the cambium_dist_and_sub_tx/ prefix to avoid overwriting EIA-based dist MCs. +# +# Example: +# just s ny create-dist-mc-cambium 2030 +create-dist-mc-cambium year_mc=year: + uv run python {{ path_repo }}/utils/pre/marginal_costs/generate_utility_tx_dx_mc.py \ + --state {{ state_upper }} \ + --utility {{ utility }} \ + --year {{ year_mc }} \ + --load-source cambium \ + --cambium-path "s3://data.sb/nrel/cambium/2024/scenario=MidCase/t={{ year_mc }}/gea=NYISO/r={{ cambium_ba }}/data.parquet" \ + --mc-table-path {{ path_mc_table }} \ + --output-s3-base {{ path_s3_mc_output_cambium }} \ + --n-hours {{ upstream_hours }} \ + --upload + +# Generate Cambium dist MCs for all adoption trajectory years (2025–2050). +# +# Example: +# just s ny create-cambium-dist-mc-all-years +create-cambium-dist-mc-all-years: + #!/usr/bin/env bash + set -euo pipefail + for yr in 2025 2030 2035 2040 2045 2050; do + echo ">> Generating Cambium dist MC for year=${yr}" >&2 + just create-dist-mc-cambium "${yr}" + done + # ============================================================================= # MID-CONFIG: generate between runs (using outputs from earlier runs) # ============================================================================= @@ -773,6 +803,75 @@ print(len(years)) done done +# Generate per-year Cambium scenario YAML entries for adoption runs. +# Uses Cambium busbar_load dist MCs and Cambium supply MCs; 0% residual cost. +# Output: config/scenarios/scenarios__adoption_cambium.yaml +# +# Example: +# just s ny generate-adoption-scenarios-cambium nyca_electrification 1,2,5,6 +generate-adoption-scenarios-cambium config_name="default" runs="1,2,5,6": + uv run python {{ path_repo }}/utils/pre/generate_adoption_scenario_yamls.py \ + --base-scenario "{{ path_scenario_config }}" \ + --runs "{{ runs }}" \ + --adoption-config "{{ path_adoption_config_dir }}/{{ config_name }}.yaml" \ + --materialized-dir "{{ path_resstock_release }}/adoption/{{ config_name }}" \ + --output "{{ path_scenarios }}/scenarios_{{ utility }}_adoption_cambium.yaml" \ + --residual-cost-frac 0.0 \ + --cambium-supply \ + --cambium-gea NYISO \ + --cambium-ba {{ cambium_ba }} \ + --cambium-dist-mc-base "s3://data.sb/switchbox/marginal_costs/{{ state }}/cambium_dist_and_sub_tx" + +# Run a single Cambium adoption scenario by (year-indexed) run number. +# Uses scenarios__adoption_cambium.yaml generated by generate-adoption-scenarios-cambium. +# +# Example: +# just s ny run-adoption-scenario-cambium 101 +run-adoption-scenario-cambium run_num: + #!/usr/bin/env bash + set -euo pipefail + : "${RDP_BATCH:?Set RDP_BATCH before running}" + export RDP_BATCH + log_dir="${HOME}/rdp_run_logs" + mkdir -p "${log_dir}" + log_file="${log_dir}/{{ utility }}_adoption_cambium_run{{ run_num }}_${RDP_BATCH}.log" + echo ">> run-adoption-scenario-cambium {{ run_num }}: logging to ${log_file}" >&2 + uv run python {{ path_repo }}/rate_design/hp_rates/run_scenario.py \ + --state "{{ state }}" \ + --scenario-config "{{ path_scenarios }}/scenarios_{{ utility }}_adoption_cambium.yaml" \ + --run-num "{{ run_num }}" \ + --output-dir "{{ path_outputs_base }}/${RDP_BATCH}" \ + 2>&1 | tee "${log_file}" + +# Full Cambium adoption pipeline: materialize → dist MCs → scenarios → run all. +# +# Example: +# RDP_BATCH=ny_20260320_adoption just s ny run-adoption-all-cambium nyca_electrification 1,2,5,6 +run-adoption-all-cambium config_name="default" runs="1,2,5,6": + #!/usr/bin/env bash + set -euo pipefail + : "${RDP_BATCH:?Set RDP_BATCH before running}" + export RDP_BATCH + just materialize-adoption "{{ config_name }}" + just create-cambium-dist-mc-all-years + just generate-adoption-scenarios-cambium "{{ config_name }}" "{{ runs }}" + # Determine number of run years from the adoption config. + run_years_count=$(uv run python -c " +import yaml +with open('{{ path_adoption_config_dir }}/{{ config_name }}.yaml') as f: + cfg = yaml.safe_load(f) +years = cfg.get('run_years', cfg.get('year_labels', [])) +print(len(years)) +") + IFS=',' read -ra base_nums <<< "{{ runs }}" + for yi in $(seq 1 "$run_years_count"); do + for base_run in "${base_nums[@]}"; do + key=$((yi * 100 + base_run)) + echo ">> run-adoption-all-cambium: run-${key} (year_index=$((yi - 1)), base_run=${base_run})" >&2 + just run-adoption-scenario-cambium "${key}" + done + done + # ============================================================================= # HELPERS # ============================================================================= diff --git a/tests/pre/test_materialize_mixed_upgrade.py b/tests/pre/test_materialize_mixed_upgrade.py index 1c982ed6..33aa494f 100644 --- a/tests/pre/test_materialize_mixed_upgrade.py +++ b/tests/pre/test_materialize_mixed_upgrade.py @@ -181,7 +181,134 @@ def test_different_seeds_differ(self) -> None: # --------------------------------------------------------------------------- -# 3. Metadata combination — unit tests via main() +# 3. Applicability-restricted assignment +# --------------------------------------------------------------------------- + + +class TestAssignBuildingsApplicability: + """assign_buildings() with applicable_bldg_ids_per_upgrade restricts pools.""" + + def test_only_applicable_buildings_assigned(self) -> None: + """No building outside its upgrade's applicable set should be assigned to it.""" + bldg_ids = _bldg_ids(100) + # upgrade 2 applicable to first 60; upgrade 4 applicable to last 40 + applicable = {2: set(range(1, 61)), 4: set(range(61, 101))} + scenario = {2: [0.10, 0.20], 4: [0.05, 0.10]} + assignments = assign_buildings( + bldg_ids, + scenario, + [0, 1], + random_seed=0, + applicable_bldg_ids_per_upgrade=applicable, + ) + for t in [0, 1]: + for bid, uid in assignments[t].items(): + if uid == 2: + assert bid in applicable[2], ( + f"bldg {bid} assigned to upgrade 2 but not applicable" + ) + if uid == 4: + assert bid in applicable[4], ( + f"bldg {bid} assigned to upgrade 4 but not applicable" + ) + + def test_non_overlapping_applicable_sets(self) -> None: + """Buildings in only one applicable set are assigned to that upgrade.""" + bldg_ids = _bldg_ids(100) + # Disjoint sets: upgrade 2 → 1-50, upgrade 4 → 51-100 + applicable = {2: set(range(1, 51)), 4: set(range(51, 101))} + scenario = {2: [0.30], 4: [0.20]} + assignments = assign_buildings( + bldg_ids, + scenario, + [0], + random_seed=0, + applicable_bldg_ids_per_upgrade=applicable, + ) + assigned_to_2 = {bid for bid, u in assignments[0].items() if u == 2} + assigned_to_4 = {bid for bid, u in assignments[0].items() if u == 4} + assert assigned_to_2.issubset(applicable[2]) + assert assigned_to_4.issubset(applicable[4]) + assert assigned_to_2.isdisjoint(assigned_to_4) + + def test_applicable_smaller_than_target_warns_and_caps(self) -> None: + """When applicable pool is smaller than target count, a warning is emitted.""" + bldg_ids = _bldg_ids(100) + # upgrade 5 only applicable to 5 buildings but scenario requests 30% + applicable = {5: set(range(1, 6))} + scenario = {5: [0.30]} + with pytest.warns(UserWarning, match="Upgrade 5"): + assignments = assign_buildings( + bldg_ids, + scenario, + [0], + random_seed=0, + applicable_bldg_ids_per_upgrade=applicable, + ) + assigned_to_5 = sum(1 for u in assignments[0].values() if u == 5) + assert assigned_to_5 == 5 # capped at pool size + + def test_overlapping_sets_no_double_assignment(self) -> None: + """When applicable sets overlap, each building gets at most one upgrade.""" + bldg_ids = _bldg_ids(100) + # Both upgrades applicable to all 100 buildings; upgrade 2 (lower ID) gets + # first pick and claims all buildings, leaving upgrade 4 with an empty pool. + applicable = {2: set(range(1, 101)), 4: set(range(1, 101))} + scenario = {2: [0.20], 4: [0.15]} + with pytest.warns(UserWarning, match="Upgrade 4"): + assignments = assign_buildings( + bldg_ids, + scenario, + [0], + random_seed=0, + applicable_bldg_ids_per_upgrade=applicable, + ) + for bid, uid in assignments[0].items(): + assert uid in {0, 2, 4}, f"bldg {bid} assigned unknown upgrade {uid}" + # No building should be in two upgrade pools + assigned_to_2 = {bid for bid, u in assignments[0].items() if u == 2} + assigned_to_4 = {bid for bid, u in assignments[0].items() if u == 4} + assert assigned_to_2.isdisjoint(assigned_to_4) + + def test_monotonicity_preserved_with_applicability(self) -> None: + """Monotonic adoption holds when using applicable_bldg_ids_per_upgrade.""" + bldg_ids = _bldg_ids(N_BLDGS) + applicable = {2: set(range(1, 81))} # 80 buildings applicable for upgrade 2 + scenario = {2: [0.10, 0.20, 0.30]} + assignments = assign_buildings( + bldg_ids, + scenario, + [0, 1, 2], + random_seed=3, + applicable_bldg_ids_per_upgrade=applicable, + ) + adopted_t0 = {bid for bid, u in assignments[0].items() if u == 2} + adopted_t1 = {bid for bid, u in assignments[1].items() if u == 2} + adopted_t2 = {bid for bid, u in assignments[2].items() if u == 2} + assert adopted_t0.issubset(adopted_t1) + assert adopted_t1.issubset(adopted_t2) + + def test_none_applicable_fallback_identical_to_unrestricted(self) -> None: + """Passing applicable_bldg_ids_per_upgrade=None gives same result as omitting it.""" + bldg_ids = _bldg_ids(N_BLDGS) + a_restricted = assign_buildings( + bldg_ids, + SCENARIO_2UP, + RUN_YEAR_INDICES, + random_seed=42, + applicable_bldg_ids_per_upgrade=None, + ) + a_unrestricted = assign_buildings( + bldg_ids, + SCENARIO_2UP, + RUN_YEAR_INDICES, + random_seed=42, + ) + assert a_restricted == a_unrestricted + + +# --------------------------------------------------------------------------- +# 4. Metadata combination — unit tests via main() # --------------------------------------------------------------------------- @@ -204,7 +331,9 @@ def fs(self, tmp_path: Path) -> Path: / f"upgrade={uid:02d}" / "metadata-sb.parquet" ) - df = _make_metadata_df(bldg_ids, has_hp if uid == 0 else None) + # upgrade=02: eligible buildings (first 7) have has_hp=True (upgrade applied); + # already-HP buildings (last 3) also keep has_hp=True. + df = _make_metadata_df(bldg_ids, has_hp if uid == 0 else [True] * 10) _write_metadata(meta_path, df) loads_dir = ( release / "load_curve_hourly" / "state=NY" / f"upgrade={uid:02d}" @@ -337,7 +466,7 @@ def test_more_hp_buildings_at_later_year( # --------------------------------------------------------------------------- -# 4. Symlink creation +# 5. Symlink creation # --------------------------------------------------------------------------- @@ -358,7 +487,8 @@ def fs_and_out(self, tmp_path: Path) -> tuple[Path, Path]: / f"upgrade={uid:02d}" / "metadata-sb.parquet" ) - _write_metadata(meta, _make_metadata_df(bldg_ids)) + has_hp = [False] * len(bldg_ids) if uid == 0 else [True] * len(bldg_ids) + _write_metadata(meta, _make_metadata_df(bldg_ids, has_hp)) loads_dir = ( release / "load_curve_hourly" / "state=RI" / f"upgrade={uid:02d}" ) @@ -438,7 +568,7 @@ def test_assigned_buildings_link_to_correct_upgrade( # --------------------------------------------------------------------------- -# 5. Scenario CSV output +# 6. Scenario CSV output # --------------------------------------------------------------------------- @@ -458,7 +588,8 @@ def out_dir(self, tmp_path: Path) -> Path: / f"upgrade={uid:02d}" / "metadata-sb.parquet" ) - _write_metadata(meta, _make_metadata_df(bldg_ids)) + has_hp = [False] * len(bldg_ids) if uid == 0 else [True] * len(bldg_ids) + _write_metadata(meta, _make_metadata_df(bldg_ids, has_hp)) ld = release / "load_curve_hourly" / "state=NY" / f"upgrade={uid:02d}" for bid in bldg_ids: _touch_load_file(ld, bid, uid) @@ -530,7 +661,7 @@ def test_csv_later_year_has_more_or_equal_adopters(self, out_dir: Path) -> None: # --------------------------------------------------------------------------- -# 6. Validation error paths +# 7. Validation error paths # --------------------------------------------------------------------------- @@ -633,7 +764,9 @@ def test_missing_load_file_for_building_raises(self, tmp_path: Path) -> None: / f"upgrade={uid:02d}" / "metadata-sb.parquet" ) - _write_metadata(meta, _make_metadata_df(bldg_ids)) + # upgrade=02 metadata must have has_hp=True so buildings are applicable. + has_hp = [False] * len(bldg_ids) if uid == 0 else [True] * len(bldg_ids) + _write_metadata(meta, _make_metadata_df(bldg_ids, has_hp)) ld = release / "load_curve_hourly" / "state=NY" / f"upgrade={uid:02d}" for bid in bldg_ids: _touch_load_file(ld, bid, uid) @@ -677,7 +810,7 @@ def test_missing_load_file_for_building_raises(self, tmp_path: Path) -> None: # --------------------------------------------------------------------------- -# 7. Config parsing: run_years snap + year indices +# 8. Config parsing: run_years snap + year indices # --------------------------------------------------------------------------- @@ -732,7 +865,7 @@ def test_string_keys_normalised_to_int(self) -> None: # --------------------------------------------------------------------------- -# 8. _build_load_file_map +# 9. _build_load_file_map # --------------------------------------------------------------------------- diff --git a/utils/pre/fit_adoption_config.py b/utils/pre/fit_adoption_config.py index 76286742..c312e923 100644 --- a/utils/pre/fit_adoption_config.py +++ b/utils/pre/fit_adoption_config.py @@ -8,8 +8,10 @@ k = growth rate t0 = inflection year -Fractions are normalized by 7,900,000 total NYCA occupied housing units -(Census ACS / NYISO Gold Book 2025 estimate). 2025 is forced to 0.0 — all +Fractions are normalized by the total NY residential electric customer count +derived from EIA-861 (PUDL), summing bundled and delivery-only service types +for the most recent available year (excludes "energy" rows which duplicate +delivery-only customers who switched to an ESCO). 2025 is forced to 0.0 — all buildings remain at upgrade-0 baseline — regardless of the logistic value. Technology → ResStock upgrade mapping: @@ -34,6 +36,7 @@ import numpy as np import polars as pl +from cloudpathlib import S3Path from plotnine import ( aes, element_line, @@ -65,8 +68,74 @@ SCENARIO_NAME = "nyca_electrification" RANDOM_SEED = 42 -# Total NYCA occupied housing units used as the fraction denominator. -TOTAL_HU = 7_900_000.0 +# S3 path to EIA-861 electric utility stats (Hive-partitioned by year and state). +_EIA861_S3_BASE = "s3://data.sb/eia/861/electric_utility_stats/" + +# --------------------------------------------------------------------------- +# EIA-861 residential customer count +# --------------------------------------------------------------------------- + + +def load_total_hu(state: str = "NY", max_year: int | None = None) -> tuple[float, int]: + """Return (total_residential_customers, year) from EIA-861 on S3. + + Sums bundled and delivery service types only — excludes "energy" rows + which duplicate delivery-only customers who switched to an ESCO supplier. + Uses the most recent partition year <= ``max_year`` (defaults to the + latest available year). + """ + from utils import get_aws_region + + storage_options = {"aws_region": get_aws_region()} + base = S3Path(_EIA861_S3_BASE) + year_dirs = sorted( + int(p.name.split("=")[1]) + for p in base.iterdir() + if p.name.startswith("year=") + ) + if not year_dirs: + raise FileNotFoundError(f"No EIA-861 year partitions found at {_EIA861_S3_BASE}") + if max_year is not None: + year_dirs = [y for y in year_dirs if y <= max_year] + if not year_dirs: + raise ValueError(f"No EIA-861 year partitions found <= {max_year}") + year = year_dirs[-1] + + path = f"{_EIA861_S3_BASE}year={year}/state={state}/data.parquet" + df = pl.scan_parquet(path, storage_options=storage_options) + + # The pre-aggregated parquet sums all service types (bundled + delivery + energy). + # We need bundled + delivery only, so we fall back to the PUDL source. + # Check whether a service_type column is present; if not, re-derive from PUDL. + schema = df.schema + if "service_type" in schema: + total = ( + df.filter(pl.col("service_type").is_in(["bundled", "delivery"])) + .select(pl.col("residential_customers").sum()) + .collect()["residential_customers"][0] + ) + else: + # Pre-aggregated file has no service_type; it already summed all types. + # Re-derive from PUDL directly to exclude the "energy" double-count. + pudl_version = "v2026.2.0" + pudl_url = ( + f"https://s3.us-west-2.amazonaws.com/pudl.catalyst.coop" + f"/{pudl_version}/core_eia861__yearly_sales.parquet" + ) + total = ( + pl.scan_parquet(pudl_url) + .filter( + (pl.col("state") == state) + & (pl.col("customer_class") == "residential") + & (pl.col("service_type").is_in(["bundled", "delivery"])) + & (pl.col("report_date").dt.year() == year) + ) + .select(pl.col("customers").sum()) + .collect()["customers"][0] + ) + + return float(total), year + # Digitized from the NYISO Gold Book 2025 NYCA stacked-area chart. # Each entry: (calendar_year, individual_technology_housing_units_in_thousands). @@ -113,6 +182,11 @@ 1: "supplemental heat", } +# Plot labels include the ResStock upgrade code so charts are self-documenting. +_UPGRADE_PLOT_LABELS: dict[int, str] = { + uid: f"upgrade {uid} — {label}" for uid, label in _UPGRADE_LABELS.items() +} + # Wong colorblind-friendly palette matched to NYISO chart hues. _UPGRADE_COLORS: dict[int, str] = { 2: "#D55E00", # vermillion / orange @@ -148,18 +222,23 @@ def _fit_logistic(years: np.ndarray, fracs: np.ndarray) -> tuple[float, float, f def fit_all( run_years: list[int], + total_hu: float, ) -> tuple[dict[int, list[float]], dict[int, tuple[float, float, float]]]: """Fit logistic curves; return ``(scenario_fracs, params)``. ``scenario_fracs[upgrade_id][i]`` is the adoption fraction at ``run_years[i]``. 2025 is forced to ``0.0``. + + Args: + run_years: Calendar years to evaluate. + total_hu: Total residential customer count used as the fraction denominator. """ scenario: dict[int, list[float]] = {} params: dict[int, tuple[float, float, float]] = {} for upgrade_id, pts in _RAW_DATA.items(): years_arr = np.array([y for y, _ in pts], dtype=float) - fracs_arr = np.array([hu * 1_000 / TOTAL_HU for _, hu in pts]) + fracs_arr = np.array([hu * 1_000 / total_hu for _, hu in pts]) L, k, t0 = _fit_logistic(years_arr, fracs_arr) params[upgrade_id] = (L, k, t0) @@ -194,6 +273,8 @@ def write_yaml( scenario: dict[int, list[float]], params: dict[int, tuple[float, float, float]], run_years: list[int], + total_hu: float, + eia_year: int, ) -> None: """Write adoption config YAML with full methodology commentary.""" param_block = "\n".join( @@ -227,9 +308,10 @@ def write_yaml( "#", "# Methodology: logistic S-curves f(t) = L / (1 + exp(-k * (t - t0))) fit", "# (scipy curve_fit) to housing-unit counts digitized from the NYISO Gold", - f"# Book 2025 NYCA stacked-area chart. Denominator: {TOTAL_HU:,.0f} total NYCA", - "# occupied housing units (Census ACS / NYISO estimate). 2025 forced to 0.0", - "# (all buildings at upgrade-0 baseline).", + f"# Book 2025 NYCA stacked-area chart. Denominator: {total_hu:,.0f} NY residential", + f"# electric customers (EIA-861 {eia_year}, bundled + delivery service types,", + "# excluding ESCO energy-only rows which duplicate delivery customers).", + "# 2025 forced to 0.0 (all buildings at upgrade-0 baseline).", "#", "# Fitted parameters:", param_block, @@ -262,6 +344,8 @@ def make_plot( params: dict[int, tuple[float, float, float]], run_years: list[int], path_plot: Path, + total_hu: float, + eia_year: int, ) -> None: """Save a plotnine figure: continuous logistic curves + digitized points.""" # Build long-format DataFrame for fitted curves. @@ -274,7 +358,7 @@ def make_plot( curve_rows.append( { "year": float(yr), - "technology": _UPGRADE_LABELS[uid], + "technology": _UPGRADE_PLOT_LABELS[uid], "pct": max(pct, 0.0), } ) @@ -288,16 +372,16 @@ def make_plot( point_rows.append( { "year": float(yr), - "technology": _UPGRADE_LABELS[uid], - "pct": hu_k * 1_000 / TOTAL_HU * 100.0, + "technology": _UPGRADE_PLOT_LABELS[uid], + "pct": hu_k * 1_000 / total_hu * 100.0, } ) points_df = pl.DataFrame(point_rows) # Ordered technology names for the legend (matches NYISO chart order, bottom→top). - tech_order = [_UPGRADE_LABELS[uid] for uid in [2, 4, 5, 1]] - color_map = {_UPGRADE_LABELS[uid]: _UPGRADE_COLORS[uid] for uid in [2, 4, 5, 1]} + tech_order = [_UPGRADE_PLOT_LABELS[uid] for uid in [2, 4, 5, 1]] + color_map = {_UPGRADE_PLOT_LABELS[uid]: _UPGRADE_COLORS[uid] for uid in [2, 4, 5, 1]} # Convert to pandas for plotnine; use pandas Categorical for legend order. import pandas as pd # noqa: PLC0415 @@ -348,13 +432,18 @@ def make_plot( ) + labs( title="NYCA HP adoption trajectory — NYISO Gold Book 2025 logistic fit", + subtitle=( + f"Denominator: {total_hu:,.0f} NY residential electric customers" + f" (EIA-861 {eia_year}, bundled + delivery)" + ), x="Year", - y="Share of NYCA housing units", - color="Technology", + y="Share of NY residential electric customers", + color="Technology (ResStock upgrade)", ) + theme_minimal() + theme( plot_title=element_text(size=11), + plot_subtitle=element_text(size=9), axis_title=element_text(size=10), legend_title=element_text(size=9), legend_text=element_text(size=9), @@ -371,13 +460,15 @@ def make_stacked_plot( params: dict[int, tuple[float, float, float]], run_years: list[int], path_plot: Path, + total_hu: float, + eia_year: int, ) -> None: """Save a stacked area chart matching the NYISO Gold Book visual style.""" import pandas as pd # noqa: PLC0415 # Stacking order bottom→top mirrors the NYISO chart. - stack_order = [_UPGRADE_LABELS[uid] for uid in [2, 4, 5, 1]] - fill_map = {_UPGRADE_LABELS[uid]: _UPGRADE_COLORS[uid] for uid in [2, 4, 5, 1]} + stack_order = [_UPGRADE_PLOT_LABELS[uid] for uid in [2, 4, 5, 1]] + fill_map = {_UPGRADE_PLOT_LABELS[uid]: _UPGRADE_COLORS[uid] for uid in [2, 4, 5, 1]} curve_rows: list[dict] = [] for uid, (L, k, t0) in params.items(): @@ -386,7 +477,7 @@ def make_stacked_plot( curve_rows.append( { "year": float(yr), - "technology": _UPGRADE_LABELS[uid], + "technology": _UPGRADE_PLOT_LABELS[uid], "pct": max(float(frac) * 100.0, 0.0), } ) @@ -422,13 +513,18 @@ def make_stacked_plot( ) + labs( title="NYCA HP adoption trajectory — NYISO Gold Book 2025 logistic fit (stacked)", + subtitle=( + f"Denominator: {total_hu:,.0f} NY residential electric customers" + f" (EIA-861 {eia_year}, bundled + delivery)" + ), x="Year", - y="Share of NYCA housing units", - fill="Technology", + y="Share of NY residential electric customers", + fill="Technology (ResStock upgrade)", ) + theme_minimal() + theme( plot_title=element_text(size=11), + plot_subtitle=element_text(size=9), axis_title=element_text(size=10), legend_title=element_text(size=9), legend_text=element_text(size=9), @@ -487,7 +583,15 @@ def main() -> None: args = build_parser().parse_args() run_years = [int(y.strip()) for y in args.run_years.split(",")] - scenario, params = fit_all(run_years) + log.info("loading NY residential customer count from EIA-861 (S3)…") + total_hu, eia_year = load_total_hu(state="NY") + log.info( + "EIA-861 %d: %,.0f NY residential electric customers (bundled + delivery)", + eia_year, + total_hu, + ) + + scenario, params = fit_all(run_years, total_hu) validate_scenario({uid: scenario[uid] for uid in scenario}) @@ -496,13 +600,15 @@ def main() -> None: total = sum(scenario[uid][i] for uid in scenario) log.info("year %d: total fraction = %.4f", yr, total) - write_yaml(Path(args.path_output), scenario, params, run_years) + write_yaml(Path(args.path_output), scenario, params, run_years, total_hu, eia_year) if args.path_plot: - make_plot(params, run_years, Path(args.path_plot)) + make_plot(params, run_years, Path(args.path_plot), total_hu, eia_year) if args.path_stacked_plot: - make_stacked_plot(params, run_years, Path(args.path_stacked_plot)) + make_stacked_plot( + params, run_years, Path(args.path_stacked_plot), total_hu, eia_year + ) if __name__ == "__main__": diff --git a/utils/pre/generate_adoption_scenario_yamls.py b/utils/pre/generate_adoption_scenario_yamls.py index d64813d7..86ad97dc 100644 --- a/utils/pre/generate_adoption_scenario_yamls.py +++ b/utils/pre/generate_adoption_scenario_yamls.py @@ -81,6 +81,52 @@ def build_parser() -> argparse.ArgumentParser: dest="path_output", help="Path to write the generated adoption scenario YAML.", ) + p.add_argument( + "--residual-cost-frac", + type=float, + default=None, + dest="residual_cost_frac", + help=( + "When set, adds residual_cost_frac to every generated run entry and " + "sets utility_revenue_requirement: none. Use 0.0 for a 0%% residual " + "(revenue requirement = total marginal costs only)." + ), + ) + p.add_argument( + "--cambium-supply", + action="store_true", + dest="cambium_supply", + help=( + "When set, rewrites supply MC paths to Cambium for runs with " + "run_includes_supply: true, and clears path_bulk_tx_mc for all runs " + "(bulk TX is already embedded in Cambium enduse costs)." + ), + ) + p.add_argument( + "--cambium-gea", + type=str, + default="NYISO", + dest="cambium_gea", + help="Cambium grid emission area (GEA) code, e.g. NYISO (default: NYISO).", + ) + p.add_argument( + "--cambium-ba", + type=str, + default=None, + dest="cambium_ba", + help="Cambium balancing area code, e.g. p127. Required when --cambium-supply is set.", + ) + p.add_argument( + "--cambium-dist-mc-base", + type=str, + default=None, + dest="cambium_dist_mc_base", + help=( + "S3 base path for Cambium-based dist MCs. When set, " + "path_dist_and_sub_tx_mc is replaced with " + "{base}/utility={utility}/year={calendar_year}/data.parquet." + ), + ) return p @@ -128,9 +174,15 @@ def _resolve_run_years(config: dict[str, Any]) -> list[tuple[int, int]]: def _replace_year_in_value(value: Any, old_year: int, new_year: int) -> Any: - """Recursively replace ``year={old_year}`` with ``year={new_year}`` in strings.""" + """Recursively replace year tokens in strings. + + Handles both ``year={old_year}`` (Hive partition keys) and + ``t={old_year}`` (Cambium path segment) patterns. + """ if isinstance(value, str): - return value.replace(f"year={old_year}", f"year={new_year}") + value = value.replace(f"year={old_year}", f"year={new_year}") + value = value.replace(f"t={old_year}", f"t={new_year}") + return value if isinstance(value, dict): return { k: _replace_year_in_value(v, old_year, new_year) for k, v in value.items() @@ -186,6 +238,10 @@ def main(argv: list[str] | None = None) -> None: path_materialized_dir = Path(args.path_materialized_dir) path_output = Path(args.path_output) + # Validate Cambium arg combinations. + if args.cambium_supply and not args.cambium_ba: + raise ValueError("--cambium-ba is required when --cambium-supply is set.") + # Parse run numbers. try: run_nums = [int(r.strip()) for r in args.runs.split(",") if r.strip()] @@ -220,6 +276,14 @@ def main(argv: list[str] | None = None) -> None: f"{len(year_run_pairs)} year(s) × {len(run_nums)} run(s) = " f"{len(year_run_pairs) * len(run_nums)} entries" ) + if args.residual_cost_frac is not None: + print( + f" residual_cost_frac={args.residual_cost_frac} (utility_revenue_requirement: none)" + ) + if args.cambium_supply: + print(f" Cambium supply MCs: gea={args.cambium_gea}, ba={args.cambium_ba}") + if args.cambium_dist_mc_base: + print(f" Cambium dist MC base: {args.cambium_dist_mc_base}") # 3. Build generated run entries. output_runs: dict[int, dict[str, Any]] = {} @@ -244,8 +308,8 @@ def main(argv: list[str] | None = None) -> None: # Update year_run to the calendar year for this adoption cohort. run_entry["year_run"] = calendar_year - # Replace year= tokens in all string path values so MC data resolves - # to the correct Cambium year. + # Replace year= and t= tokens in all string path values so MC data + # resolves to the correct year. run_entry = _replace_year_in_value(run_entry, old_year_run, calendar_year) # Update run_name to include year and mixed tag. @@ -254,6 +318,31 @@ def main(argv: list[str] | None = None) -> None: calendar_year, ) + # Apply Cambium-specific path overrides. + if args.cambium_supply: + cambium_path = ( + f"s3://data.sb/nrel/cambium/2024/scenario=MidCase" + f"/t={calendar_year}/gea={args.cambium_gea}/r={args.cambium_ba}/data.parquet" + ) + run_includes_supply = bool(run_entry.get("run_includes_supply", False)) + if run_includes_supply: + run_entry["path_supply_energy_mc"] = cambium_path + run_entry["path_supply_capacity_mc"] = cambium_path + # Clear bulk TX for all runs: Cambium enduse costs already include it. + run_entry["path_bulk_tx_mc"] = "" + + if args.cambium_dist_mc_base: + utility_val = str(run_entry.get("utility", "")) + base = args.cambium_dist_mc_base.rstrip("/") + run_entry["path_dist_and_sub_tx_mc"] = ( + f"{base}/utility={utility_val}/year={calendar_year}/data.parquet" + ) + + # Apply residual cost fraction override. + if args.residual_cost_frac is not None: + run_entry["residual_cost_frac"] = args.residual_cost_frac + run_entry["utility_revenue_requirement"] = None + output_key = (year_index + 1) * 100 + run_num output_runs[output_key] = run_entry print( diff --git a/utils/pre/marginal_costs/generate_utility_tx_dx_mc.py b/utils/pre/marginal_costs/generate_utility_tx_dx_mc.py index 4f35fb73..ccdd6813 100644 --- a/utils/pre/marginal_costs/generate_utility_tx_dx_mc.py +++ b/utils/pre/marginal_costs/generate_utility_tx_dx_mc.py @@ -5,7 +5,8 @@ price signals. Input: - - Utility hourly load profile: s3://data.sb/eia/hourly_demand/utilities/region=/utility=X/year=YYYY/month=M/data.parquet + - Utility hourly load profile (EIA): s3://data.sb/eia/hourly_demand/utilities/region=/utility=X/year=YYYY/month=M/data.parquet + - OR Cambium busbar_load (--load-source cambium): s3://data.sb/nrel/cambium/2024/scenario=MidCase/t=YYYY/gea=GEA/r=BA/data.parquet - Marginal cost table CSV with columns: utility, sub_tx_and_dist_mc_kw_yr - Load year (determines which load profile year to use) @@ -15,18 +16,26 @@ - Partition path: utility=X/year=YYYY/data.parquet Usage: - # Inspect results (no upload) - uses 2025 loads + # EIA load source (default) - Inspect results (no upload) - uses 2025 loads python generate_utility_tx_dx_mc.py --state RI --utility rie --load-year 2025 \ --mc-table-path rate_design/hp_rates/ri/config/marginal_costs/ri_marginal_costs_2025.csv \ --utility-load-s3-base s3://data.sb/eia/hourly_demand/utilities/ \ --output-s3-base s3://data.sb/switchbox/marginal_costs/ri/dist_and_sub_tx/ - # Upload to S3 + # EIA load source - Upload to S3 python generate_utility_tx_dx_mc.py --state NY --utility nyseg --load-year 2024 \ --mc-table-path rate_design/hp_rates/ny/config/marginal_costs/ny_sub_tx_and_dist_mc_levelized.csv \ --utility-load-s3-base s3://data.sb/eia/hourly_demand/utilities/ \ --output-s3-base s3://data.sb/switchbox/marginal_costs/ny/dist_and_sub_tx/ \ --upload + + # Cambium busbar_load source - Upload to S3 + python generate_utility_tx_dx_mc.py --state NY --utility nyseg --year 2030 \ + --load-source cambium \ + --cambium-path s3://data.sb/nrel/cambium/2024/scenario=MidCase/t=2030/gea=NYISO/r=p127/data.parquet \ + --mc-table-path rate_design/hp_rates/ny/config/marginal_costs/ny_sub_tx_and_dist_mc_levelized.csv \ + --output-s3-base s3://data.sb/switchbox/marginal_costs/ny/cambium_dist_and_sub_tx/ \ + --upload """ import argparse @@ -94,6 +103,57 @@ def load_utility_load_profile( return df +def load_cambium_load_profile( + cambium_path: str, + utility: str, + storage_options: dict[str, str], +) -> pl.DataFrame: + """Load Cambium busbar_load as a utility load profile for PoP allocation. + + Reads the ``busbar_load`` column (MWh busbar-equivalent) from a Cambium + parquet file and returns it in the same format expected by + :func:`normalize_load_to_cairo_8760`. + + Distribution infrastructure capacity is sized to busbar-level peaks (before + distribution losses), so ``busbar_load`` is the appropriate PoP allocation + load shape — analogous to the EIA utility ``load_mw`` column. + + Args: + cambium_path: S3 or local path to a Cambium data.parquet file with + columns ``timestamp`` and ``busbar_load``. + utility: Utility short code (e.g. ``nyseg``) — written into the + ``utility`` column of the returned DataFrame. + storage_options: Polars S3 storage options with AWS bucket region. + + Returns: + DataFrame with columns: timestamp, utility, load_mw (8760 rows expected + after normalization; ``busbar_load`` is renamed to ``load_mw``). + """ + print(f"Loading Cambium busbar_load from: {cambium_path}") + if cambium_path.startswith("s3://"): + df = pl.read_parquet(cambium_path, storage_options=storage_options) + else: + df = pl.read_parquet(cambium_path) + + required = {"timestamp", "busbar_load"} + missing = required - set(df.columns) + if missing: + raise ValueError( + f"Cambium parquet is missing expected columns {missing}. " + f"Available: {df.columns}" + ) + + df = df.select( + [ + pl.col("timestamp"), + pl.col("busbar_load").alias("load_mw"), + ] + ).with_columns(pl.lit(utility).alias("utility")) + + print(f"Loaded {len(df):,} Cambium rows (busbar_load → load_mw) for {utility}") + return df + + def normalize_load_to_cairo_8760( load_df: pl.DataFrame, utility: str, year_load: int ) -> pl.DataFrame: @@ -479,15 +539,38 @@ def main(): required=True, help="Path to marginal cost table CSV (local or s3://)", ) + parser.add_argument( + "--load-source", + type=str, + choices=["eia", "cambium"], + default="eia", + help=( + "Load profile source for PoP allocation. " + "'eia' uses EIA/NYISO utility hourly loads (default). " + "'cambium' uses Cambium busbar_load -- requires --cambium-path." + ), + ) + parser.add_argument( + "--cambium-path", + type=str, + default=None, + dest="cambium_path", + help=( + "Path to Cambium data.parquet (S3 or local). Required when " + "--load-source cambium. " + "E.g. s3://data.sb/nrel/cambium/2024/scenario=MidCase/t=2030/gea=NYISO/r=p127/data.parquet" + ), + ) parser.add_argument( "--utility-load-s3-base", "--nyiso-s3-base", dest="utility_load_s3_base", type=str, - required=True, + default=None, help=( "Base S3 path for utility loads " - "(e.g. s3://data.sb/eia/hourly_demand/utilities/)" + "(e.g. s3://data.sb/eia/hourly_demand/utilities/). " + "Required when --load-source eia (the default)." ), ) parser.add_argument( @@ -511,6 +594,15 @@ def main(): args = parser.parse_args() validate_mc_table_path(args.mc_table_path) + + # Validate load-source-specific required args. + if args.load_source == "eia" and not args.utility_load_s3_base: + parser.error( + "--utility-load-s3-base is required when --load-source eia (the default)." + ) + if args.load_source == "cambium" and not args.cambium_path: + parser.error("--cambium-path is required when --load-source cambium.") + load_dotenv() config = get_state_config(args.state) storage_options = get_aws_storage_options() @@ -518,17 +610,20 @@ def main(): output_year = args.year load_year = args.load_year if args.load_year else output_year - # Detect whether load path uses EIA layout (with region partition) or - # NYISO layout (no region partition) based on path prefix. - s3_base = args.utility_load_s3_base - iso_region: str | None = config.iso_region - if "nyiso/hourly_demand" in s3_base: - iso_region = None - print("=" * 60) print("MARGINAL COST ALLOCATION") print(f"State: {config.state}") - print(f"ISO region partition: {iso_region or '(none — NYISO native path)'}") + print(f"Load source: {args.load_source}") + if args.load_source == "cambium": + print(f"Cambium path: {args.cambium_path}") + else: + # Detect whether load path uses EIA layout (with region partition) or + # NYISO layout (no region partition) based on path prefix. + s3_base = args.utility_load_s3_base + iso_region: str | None = config.iso_region + if "nyiso/hourly_demand" in s3_base: + iso_region = None + print(f"ISO region partition: {iso_region or '(none — NYISO native path)'}") print(f"AWS bucket region: {storage_options.get('region')}") print("=" * 60) print(f"Utility: {args.utility}") @@ -538,14 +633,22 @@ def main(): print(f"Upload to S3: {'Yes' if args.upload else 'No (inspection only)'}") print("=" * 60) - load_df = load_utility_load_profile( - s3_base, - iso_region, - load_year, - args.utility, - storage_options, - ) - load_df = normalize_load_to_cairo_8760(load_df, args.utility, load_year) + if args.load_source == "cambium": + load_df = load_cambium_load_profile( + args.cambium_path, + args.utility, + storage_options, + ) + load_df = normalize_load_to_cairo_8760(load_df, args.utility, load_year) + else: + load_df = load_utility_load_profile( + s3_base, + iso_region, + load_year, + args.utility, + storage_options, + ) + load_df = normalize_load_to_cairo_8760(load_df, args.utility, load_year) if load_year != output_year: print(f"\n Remapping load timestamps: {load_year} → {output_year}") diff --git a/utils/pre/materialize_mixed_upgrade.py b/utils/pre/materialize_mixed_upgrade.py index 479191c1..1f52e827 100644 --- a/utils/pre/materialize_mixed_upgrade.py +++ b/utils/pre/materialize_mixed_upgrade.py @@ -208,6 +208,7 @@ def assign_buildings( scenario: dict[int, list[float]], run_year_indices: list[int], random_seed: int, + applicable_bldg_ids_per_upgrade: dict[int, set[int]] | None = None, ) -> dict[int, dict[int, int]]: """Assign buildings to upgrades per run-year index. @@ -219,42 +220,86 @@ def assign_buildings( eligible_bldg_ids: Building IDs eligible for HP adoption (i.e. those whose ``postprocess_group.has_hp`` is not True in upgrade-0 metadata). scenario: Dict mapping upgrade_id → per-year cumulative adoption fractions. - Fractions are relative to the *total* building population, so the - caller is responsible for passing a proportionally correct subset. + Fractions are relative to the *total* building population (all upgrades + combined), so the caller is responsible for passing a proportionally + correct subset. run_year_indices: Indices into the scenario lists to materialise. random_seed: Seed for reproducible shuffling. + applicable_bldg_ids_per_upgrade: Optional per-upgrade sets of building IDs + that are actually applicable for each upgrade (i.e. buildings where + ``postprocess_group.has_hp`` is True in that upgrade's metadata). + When provided, each upgrade draws only from its applicable pool rather + than the full eligible pool, preventing non-applicable buildings (which + carry baseline loads regardless of upgrade assignment) from being counted + as HP adopters. If two upgrades share applicable buildings, earlier + upgrades in sorted order take priority. If ``None``, all eligible + buildings are candidates for every upgrade (original behaviour). Returns: ``{year_index: {bldg_id: upgrade_id}}`` — upgrade 0 means "baseline". Only covers ``eligible_bldg_ids``; already-HP buildings are not included. """ - n_bldgs = len(eligible_bldg_ids) - if n_bldgs == 0: + n_total = len(eligible_bldg_ids) + if n_total == 0: return {t: {} for t in run_year_indices} rng = np.random.default_rng(random_seed) - bldg_array = np.array(sorted(eligible_bldg_ids), dtype=np.int64) - rng.shuffle(bldg_array) - upgrades_sorted = sorted(scenario.keys()) num_years = len(next(iter(scenario.values()))) last_t = num_years - 1 - # Pre-allocate contiguous slot ranges using the last year's fractions - # (max fractions since they are non-decreasing). Slots don't overlap, - # and since total adoption <= 1.0 the ranges all fit within [0, N). + if applicable_bldg_ids_per_upgrade is not None: + # Build per-upgrade pools restricted to applicable buildings. + # Each eligible building goes to the first upgrade (by sorted ID) for + # which it is applicable, so pools are non-overlapping. + eligible_set = set(eligible_bldg_ids) + claimed: set[int] = set() + per_upgrade_pools: dict[int, np.ndarray] = {} + for u in upgrades_sorted: + applicable = applicable_bldg_ids_per_upgrade.get(u, set()) + pool = sorted(applicable & eligible_set - claimed) + arr = np.array(pool, dtype=np.int64) + rng.shuffle(arr) + per_upgrade_pools[u] = arr + claimed.update(pool) + + result: dict[int, dict[int, int]] = {} + for t in run_year_indices: + assignments: dict[int, int] = {bid: 0 for bid in eligible_bldg_ids} + for u in upgrades_sorted: + pool = per_upgrade_pools[u] + # Fractions are of total eligible population, not just the pool. + count_t = int(n_total * scenario[u][t]) + actual_count = min(count_t, len(pool)) + if actual_count < count_t: + warnings.warn( + f"Upgrade {u}: target {count_t} buildings " + f"but only {len(pool)} are applicable; " + f"capping at {actual_count}. " + "Consider reducing the adoption fraction for this upgrade.", + stacklevel=2, + ) + for i in range(actual_count): + assignments[int(pool[i])] = u + result[t] = assignments + return result + + # Original behaviour: one shuffled array, contiguous non-overlapping bands. + bldg_array = np.array(sorted(eligible_bldg_ids), dtype=np.int64) + rng.shuffle(bldg_array) + upgrade_offsets: dict[int, int] = {} cumulative_offset = 0 for u in upgrades_sorted: upgrade_offsets[u] = cumulative_offset - max_count = int(n_bldgs * scenario[u][last_t]) + max_count = int(n_total * scenario[u][last_t]) cumulative_offset += max_count - result: dict[int, dict[int, int]] = {} + result = {} for t in run_year_indices: - assignments: dict[int, int] = {int(bid): 0 for bid in bldg_array} + assignments = {int(bid): 0 for bid in bldg_array} for u in upgrades_sorted: - count_t = int(n_bldgs * scenario[u][t]) + count_t = int(n_total * scenario[u][t]) offset = upgrade_offsets[u] for i in range(count_t): assignments[int(bldg_array[offset + i])] = u @@ -351,9 +396,45 @@ def main(argv: list[str] | None = None) -> None: f"{len(already_hp_bldg_ids)} already have HP → kept at upgrade 0)" ) - # 4. Assign only eligible buildings to upgrades per run year. + # 4. Load all upgrade metadata DataFrames now so applicability can be computed. + upgrade_dfs: dict[int, pl.DataFrame] = {0: baseline_df} + for uid in non_baseline_upgrades: + upgrade_dfs[uid] = pl.read_parquet( + _metadata_path(path_resstock_release, state_upper, uid) + ) + + # 5. Assign only eligible buildings to upgrades per run year. + # For each non-baseline upgrade, restrict the pool to buildings that actually + # received the upgrade in ResStock (postprocess_group.has_hp=True in that + # upgrade's metadata). This prevents assigning e.g. GSHP to ductless buildings + # or dual-fuel ASHP to electrically-heated buildings — those buildings have + # baseline loads in the upgrade data regardless of which upgrade they're placed in. + applicable_bldg_ids_per_upgrade: dict[int, set[int]] | None = None + if all(has_hp_col in upgrade_dfs[uid].columns for uid in non_baseline_upgrades): + applicable_bldg_ids_per_upgrade = {} + eligible_set = set(eligible_bldg_ids) + for uid in non_baseline_upgrades: + applicable = ( + set(upgrade_dfs[uid].filter(pl.col(has_hp_col))["bldg_id"].to_list()) + & eligible_set + ) + applicable_bldg_ids_per_upgrade[uid] = applicable + print( + f" upgrade {uid}: {len(applicable)} applicable eligible buildings " + f"(has_hp=True in upgrade metadata, not already HP in baseline)" + ) + else: + print( + " Warning: postprocess_group.has_hp missing from one or more upgrade " + "metadata files; falling back to unrestricted pool for all upgrades." + ) + eligible_assignments_by_year = assign_buildings( - eligible_bldg_ids, scenario, run_year_indices, random_seed + eligible_bldg_ids, + scenario, + run_year_indices, + random_seed, + applicable_bldg_ids_per_upgrade=applicable_bldg_ids_per_upgrade, ) # Merge already-HP buildings back in (pinned to upgrade 0 in all years). @@ -363,13 +444,6 @@ def main(argv: list[str] | None = None) -> None: for t in run_year_indices } - # 5. Load all upgrade metadata DataFrames (indexed by bldg_id for fast lookup). - upgrade_dfs: dict[int, pl.DataFrame] = {0: baseline_df} - for uid in non_baseline_upgrades: - upgrade_dfs[uid] = pl.read_parquet( - _metadata_path(path_resstock_release, state_upper, uid) - ) - path_output_dir.mkdir(parents=True, exist_ok=True) all_year_data: list[tuple[int, dict[int, int]]] = [] From 41994b7d2bcf78a659547f549c55bb4aefb6d897 Mon Sep 17 00:00:00 2001 From: sherryzuo <145484385+sherryzuo@users.noreply.github.com> Date: Mon, 23 Mar 2026 17:15:47 +0000 Subject: [PATCH 13/19] Add tests for generate_adoption_scenario_yamls Tests cover: - _replace_year_in_value: both year= (Hive) and t= (Cambium) token replacement - --residual-cost-frac: sets field and clears utility_revenue_requirement - --cambium-supply: rewrites supply MC paths for supply runs, clears bulk TX - --cambium-dist-mc-base: constructs Cambium dist MC path per utility/year - End-to-end t= token replacement across adoption years --- .../test_generate_adoption_scenario_yamls.py | 435 ++++++++++++++++++ 1 file changed, 435 insertions(+) create mode 100644 tests/pre/test_generate_adoption_scenario_yamls.py diff --git a/tests/pre/test_generate_adoption_scenario_yamls.py b/tests/pre/test_generate_adoption_scenario_yamls.py new file mode 100644 index 00000000..88c249dd --- /dev/null +++ b/tests/pre/test_generate_adoption_scenario_yamls.py @@ -0,0 +1,435 @@ +"""Tests for utils/pre/generate_adoption_scenario_yamls.py.""" + +from __future__ import annotations + +from pathlib import Path +from typing import Any + +import pytest +import yaml + +from utils.pre.generate_adoption_scenario_yamls import ( + _replace_year_in_value, + _update_run_name, + main, +) + +ADOPTION_CONFIG: dict[str, Any] = { + "scenario_name": "test_scenario", + "year_labels": [2025, 2030], + "run_years": [2025, 2030], + "upgrades": {2: {"label": "hp", "fractions": [0.1, 0.2]}}, +} + +BASE_RUNS: dict[str, Any] = { + "runs": { + 1: { + "run_name": "ny_nyseg_run1_up00_precalc__flat", + "state": "NY", + "utility": "nyseg", + "run_type": "precalc", + "upgrade": "0", + "path_resstock_metadata": "/old/metadata-sb.parquet", + "path_resstock_loads": "/old/loads/", + "path_dist_and_sub_tx_mc": "s3://data.sb/switchbox/marginal_costs/ny/dist_and_sub_tx/utility=nyseg/year=2025/data.parquet", + "path_supply_energy_mc": "s3://data.sb/switchbox/marginal_costs/ny/supply/energy/utility=nyseg/year=2025/zero.parquet", + "path_supply_capacity_mc": "s3://data.sb/switchbox/marginal_costs/ny/supply/capacity/utility=nyseg/year=2025/zero.parquet", + "path_bulk_tx_mc": "s3://data.sb/switchbox/marginal_costs/ny/bulk_tx/utility=nyseg/year=2025/data.parquet", + "utility_revenue_requirement": "rev_requirement/nyseg.yaml", + "run_includes_supply": False, + "year_run": 2025, + }, + 2: { + "run_name": "ny_nyseg_run2_up00_precalc_supply__flat", + "state": "NY", + "utility": "nyseg", + "run_type": "precalc", + "upgrade": "0", + "path_resstock_metadata": "/old/metadata-sb.parquet", + "path_resstock_loads": "/old/loads/", + "path_dist_and_sub_tx_mc": "s3://data.sb/switchbox/marginal_costs/ny/dist_and_sub_tx/utility=nyseg/year=2025/data.parquet", + "path_supply_energy_mc": "s3://data.sb/switchbox/marginal_costs/ny/supply/energy/utility=nyseg/year=2025/data.parquet", + "path_supply_capacity_mc": "s3://data.sb/switchbox/marginal_costs/ny/supply/capacity/utility=nyseg/year=2025/data.parquet", + "path_bulk_tx_mc": "s3://data.sb/switchbox/marginal_costs/ny/bulk_tx/utility=nyseg/year=2025/data.parquet", + "utility_revenue_requirement": "rev_requirement/nyseg.yaml", + "run_includes_supply": True, + "year_run": 2025, + }, + } +} + +BASE_RUNS_WITH_CAMBIUM_T: dict[str, Any] = { + "runs": { + 1: { + "run_name": "ny_nyseg_run1_y2025_mixed__flat", + "state": "NY", + "utility": "nyseg", + "run_type": "precalc", + "upgrade": "0", + "path_resstock_metadata": "/old/metadata-sb.parquet", + "path_resstock_loads": "/old/loads/", + "path_dist_and_sub_tx_mc": "s3://data.sb/switchbox/marginal_costs/ny/dist_and_sub_tx/utility=nyseg/year=2025/data.parquet", + "path_supply_energy_mc": "s3://data.sb/nrel/cambium/2024/scenario=MidCase/t=2025/gea=NYISO/r=p127/data.parquet", + "path_supply_capacity_mc": "s3://data.sb/nrel/cambium/2024/scenario=MidCase/t=2025/gea=NYISO/r=p127/data.parquet", + "path_bulk_tx_mc": "", + "utility_revenue_requirement": None, + "run_includes_supply": True, + "year_run": 2025, + "residual_cost_frac": 0.0, + }, + } +} + + +def _write_yaml(path: Path, data: dict[str, Any]) -> None: + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text(yaml.dump(data, default_flow_style=False)) + + +def _make_test_inputs( + tmp_path: Path, + base_runs: dict[str, Any] | None = None, + adoption_config: dict[str, Any] | None = None, +) -> tuple[Path, Path, Path, Path]: + if base_runs is None: + base_runs = BASE_RUNS + if adoption_config is None: + adoption_config = ADOPTION_CONFIG + path_base = tmp_path / "scenarios_nyseg.yaml" + _write_yaml(path_base, base_runs) + path_adopt = tmp_path / "adoption.yaml" + _write_yaml(path_adopt, adoption_config) + path_mat = tmp_path / "materialized" + for yr in [2025, 2030]: + (path_mat / f"year={yr}").mkdir(parents=True) + path_out = tmp_path / "scenarios_nyseg_adoption.yaml" + return path_base, path_adopt, path_mat, path_out + + +class TestReplaceYearInValue: + """_replace_year_in_value handles both year= (Hive) and t= (Cambium) tokens.""" + + def test_replaces_year_token(self) -> None: + assert ( + _replace_year_in_value("path/year=2025/data.parquet", 2025, 2030) + == "path/year=2030/data.parquet" + ) + + def test_replaces_t_token(self) -> None: + result = _replace_year_in_value( + "s3://cambium/t=2025/gea=NYISO/data.parquet", 2025, 2030 + ) + assert "t=2030" in result + assert "t=2025" not in result + + def test_replaces_both_tokens(self) -> None: + assert ( + _replace_year_in_value("year=2025/t=2025/x", 2025, 2030) + == "year=2030/t=2030/x" + ) + + def test_no_replacement_when_year_absent(self) -> None: + assert ( + _replace_year_in_value("year=2024/data.parquet", 2025, 2030) + == "year=2024/data.parquet" + ) + + def test_t_token_not_replaced_when_year_mismatch(self) -> None: + assert ( + _replace_year_in_value("t=2024/data.parquet", 2025, 2030) + == "t=2024/data.parquet" + ) + + def test_replaces_in_dict(self) -> None: + d = {"a": "year=2025/a.parquet", "b": "t=2025/b.parquet"} + result = _replace_year_in_value(d, 2025, 2030) + assert result == {"a": "year=2030/a.parquet", "b": "t=2030/b.parquet"} + + def test_replaces_in_nested_dict(self) -> None: + result = _replace_year_in_value( + {"inner": {"path": "year=2025/x.parquet"}}, 2025, 2030 + ) + assert result["inner"]["path"] == "year=2030/x.parquet" + + def test_replaces_in_list(self) -> None: + assert _replace_year_in_value( + ["year=2025/a.parquet", "t=2025/b.parquet"], 2025, 2030 + ) == ["year=2030/a.parquet", "t=2030/b.parquet"] + + def test_non_string_unchanged(self) -> None: + assert _replace_year_in_value(42, 2025, 2030) == 42 + assert _replace_year_in_value(None, 2025, 2030) is None + + +class TestUpdateRunName: + def test_no_double_underscore_suffix(self) -> None: + assert _update_run_name("ny_nyseg_run1", 2030) == "ny_nyseg_run1_y2030_mixed" + + def test_with_double_underscore_suffix(self) -> None: + assert ( + _update_run_name("ny_nyseg_run1_up00_precalc__flat", 2030) + == "ny_nyseg_run1_up00_precalc_y2030_mixed__flat" + ) + + +class TestMainBaseline: + def test_generates_correct_count(self, tmp_path: Path) -> None: + path_base, path_adopt, path_mat, path_out = _make_test_inputs(tmp_path) + main( + [ + "--base-scenario", + str(path_base), + "--runs", + "1,2", + "--adoption-config", + str(path_adopt), + "--materialized-dir", + str(path_mat), + "--output", + str(path_out), + ] + ) + result = yaml.safe_load(path_out.read_text()) + assert len(result["runs"]) == 4 # 2 years x 2 runs + + def test_output_keys(self, tmp_path: Path) -> None: + path_base, path_adopt, path_mat, path_out = _make_test_inputs(tmp_path) + main( + [ + "--base-scenario", + str(path_base), + "--runs", + "1,2", + "--adoption-config", + str(path_adopt), + "--materialized-dir", + str(path_mat), + "--output", + str(path_out), + ] + ) + result = yaml.safe_load(path_out.read_text()) + assert set(result["runs"].keys()) == {101, 102, 201, 202} + + def test_year_token_replaced_in_dist_mc_path(self, tmp_path: Path) -> None: + path_base, path_adopt, path_mat, path_out = _make_test_inputs(tmp_path) + main( + [ + "--base-scenario", + str(path_base), + "--runs", + "1", + "--adoption-config", + str(path_adopt), + "--materialized-dir", + str(path_mat), + "--output", + str(path_out), + ] + ) + result = yaml.safe_load(path_out.read_text()) + assert "year=2030" in result["runs"][201]["path_dist_and_sub_tx_mc"] + assert "year=2025" not in result["runs"][201]["path_dist_and_sub_tx_mc"] + + +class TestMainResidualCostFrac: + def _run_with_frac(self, tmp_path: Path, frac: str = "0.0") -> dict[str, Any]: + path_base, path_adopt, path_mat, path_out = _make_test_inputs(tmp_path) + main( + [ + "--base-scenario", + str(path_base), + "--runs", + "1,2", + "--adoption-config", + str(path_adopt), + "--materialized-dir", + str(path_mat), + "--output", + str(path_out), + "--residual-cost-frac", + frac, + ] + ) + return yaml.safe_load(path_out.read_text()) + + def test_frac_present_in_all_entries(self, tmp_path: Path) -> None: + for entry in self._run_with_frac(tmp_path)["runs"].values(): + assert entry.get("residual_cost_frac") == pytest.approx(0.0) + + def test_utility_revenue_requirement_none(self, tmp_path: Path) -> None: + for entry in self._run_with_frac(tmp_path)["runs"].values(): + assert entry.get("utility_revenue_requirement") is None + + def test_custom_frac_value(self, tmp_path: Path) -> None: + for entry in self._run_with_frac(tmp_path, frac="0.1")["runs"].values(): + assert entry.get("residual_cost_frac") == pytest.approx(0.1) + + def test_no_flag_leaves_field_absent(self, tmp_path: Path) -> None: + path_base, path_adopt, path_mat, path_out = _make_test_inputs(tmp_path) + main( + [ + "--base-scenario", + str(path_base), + "--runs", + "1", + "--adoption-config", + str(path_adopt), + "--materialized-dir", + str(path_mat), + "--output", + str(path_out), + ] + ) + result = yaml.safe_load(path_out.read_text()) + for entry in result["runs"].values(): + assert "residual_cost_frac" not in entry + + +class TestMainCambiumSupply: + def _run(self, tmp_path: Path, runs: str = "1,2") -> dict[str, Any]: + path_base, path_adopt, path_mat, path_out = _make_test_inputs(tmp_path) + main( + [ + "--base-scenario", + str(path_base), + "--runs", + runs, + "--adoption-config", + str(path_adopt), + "--materialized-dir", + str(path_mat), + "--output", + str(path_out), + "--cambium-supply", + "--cambium-gea", + "NYISO", + "--cambium-ba", + "p127", + ] + ) + return yaml.safe_load(path_out.read_text()) + + def test_supply_run_gets_cambium_energy_path(self, tmp_path: Path) -> None: + run_102 = self._run(tmp_path)["runs"][102] + assert "data.sb/nrel/cambium" in run_102["path_supply_energy_mc"] + assert "MidCase" in run_102["path_supply_energy_mc"] + + def test_supply_run_gets_cambium_capacity_path(self, tmp_path: Path) -> None: + assert ( + "data.sb/nrel/cambium" + in self._run(tmp_path)["runs"][102]["path_supply_capacity_mc"] + ) + + def test_delivery_run_supply_paths_not_overwritten(self, tmp_path: Path) -> None: + run_101 = self._run(tmp_path)["runs"][101] + assert "data.sb/nrel/cambium" not in run_101["path_supply_energy_mc"] + assert "data.sb/nrel/cambium" not in run_101["path_supply_capacity_mc"] + + def test_bulk_tx_cleared_for_all_runs(self, tmp_path: Path) -> None: + for entry in self._run(tmp_path)["runs"].values(): + assert entry.get("path_bulk_tx_mc") == "" + + def test_cambium_path_uses_correct_year(self, tmp_path: Path) -> None: + run_202 = self._run(tmp_path)["runs"][202] + assert "t=2030" in run_202["path_supply_energy_mc"] + assert "t=2025" not in run_202["path_supply_energy_mc"] + + def test_cambium_path_includes_gea_and_ba(self, tmp_path: Path) -> None: + path = self._run(tmp_path)["runs"][102]["path_supply_energy_mc"] + assert "gea=NYISO" in path + assert "r=p127" in path + + def test_cambium_supply_without_ba_raises(self, tmp_path: Path) -> None: + path_base, path_adopt, path_mat, path_out = _make_test_inputs(tmp_path) + with pytest.raises(ValueError, match="--cambium-ba"): + main( + [ + "--base-scenario", + str(path_base), + "--runs", + "1", + "--adoption-config", + str(path_adopt), + "--materialized-dir", + str(path_mat), + "--output", + str(path_out), + "--cambium-supply", + ] + ) + + +class TestMainCambiumDistMcBase: + CAMBIUM_BASE = "s3://data.sb/switchbox/marginal_costs/ny/cambium_dist_and_sub_tx" + + def _run(self, tmp_path: Path) -> dict[str, Any]: + path_base, path_adopt, path_mat, path_out = _make_test_inputs(tmp_path) + main( + [ + "--base-scenario", + str(path_base), + "--runs", + "1", + "--adoption-config", + str(path_adopt), + "--materialized-dir", + str(path_mat), + "--output", + str(path_out), + "--cambium-dist-mc-base", + self.CAMBIUM_BASE, + ] + ) + return yaml.safe_load(path_out.read_text()) + + def test_dist_mc_path_uses_cambium_base(self, tmp_path: Path) -> None: + for entry in self._run(tmp_path)["runs"].values(): + assert "cambium_dist_and_sub_tx" in entry["path_dist_and_sub_tx_mc"] + + def test_dist_mc_path_contains_utility_partition(self, tmp_path: Path) -> None: + for entry in self._run(tmp_path)["runs"].values(): + assert "utility=nyseg" in entry["path_dist_and_sub_tx_mc"] + + def test_dist_mc_path_year_matches_run_year(self, tmp_path: Path) -> None: + result = self._run(tmp_path) + assert "year=2025" in result["runs"][101]["path_dist_and_sub_tx_mc"] + assert "year=2030" in result["runs"][201]["path_dist_and_sub_tx_mc"] + + def test_dist_mc_path_ends_with_data_parquet(self, tmp_path: Path) -> None: + for entry in self._run(tmp_path)["runs"].values(): + assert entry["path_dist_and_sub_tx_mc"].endswith("/data.parquet") + + +class TestMainTTokenReplacement: + """End-to-end: base runs with Cambium t= paths get tokens updated per year.""" + + def test_t_token_replaced_in_supply_mc_paths(self, tmp_path: Path) -> None: + path_base = tmp_path / "scenarios_nyseg.yaml" + _write_yaml(path_base, BASE_RUNS_WITH_CAMBIUM_T) + path_adopt = tmp_path / "adoption.yaml" + _write_yaml(path_adopt, ADOPTION_CONFIG) + path_mat = tmp_path / "materialized" + for yr in [2025, 2030]: + (path_mat / f"year={yr}").mkdir(parents=True) + path_out = tmp_path / "out.yaml" + main( + [ + "--base-scenario", + str(path_base), + "--runs", + "1", + "--adoption-config", + str(path_adopt), + "--materialized-dir", + str(path_mat), + "--output", + str(path_out), + ] + ) + result = yaml.safe_load(path_out.read_text()) + # key 101: year_index 0 (2025) - t= should stay t=2025 + assert "t=2025" in result["runs"][101]["path_supply_energy_mc"] + # key 201: year_index 1 (2030) - t= should be updated to t=2030 + assert "t=2030" in result["runs"][201]["path_supply_energy_mc"] + assert "t=2025" not in result["runs"][201]["path_supply_energy_mc"] From 95b6108d207ae50fa6ad921a45b77b6d2c1c797c Mon Sep 17 00:00:00 2001 From: sherryzuo <145484385+sherryzuo@users.noreply.github.com> Date: Mon, 23 Mar 2026 17:15:54 +0000 Subject: [PATCH 14/19] Apply prek auto-formatting fixes - data/resstock/Justfile: remove trailing whitespace - nyca_electrification.yaml: yamlfmt blank line cleanup - run_scenario.py: ruff line-length reformat - tests/pre/__init__.py: remove spurious blank line - fit_adoption_config.py: ruff line-length reformat --- data/resstock/Justfile | 3 ++- .../ny/config/adoption/nyca_electrification.yaml | 11 ++++------- rate_design/hp_rates/run_scenario.py | 6 +++++- tests/pre/__init__.py | 1 - utils/pre/fit_adoption_config.py | 12 +++++++----- 5 files changed, 18 insertions(+), 15 deletions(-) diff --git a/data/resstock/Justfile b/data/resstock/Justfile index c1b5dc4d..6efe709b 100644 --- a/data/resstock/Justfile +++ b/data/resstock/Justfile @@ -321,7 +321,8 @@ adjust-mf-electricity-NY-upgrade-05: # Copy, adjust loads, and sync upgrades 01, 04, 05 into the _sb release for NY. # Assumes prepare-metadata-ny has already been run (it processes all upgrades 00-05). # metadata_utility (utility assignment) is upgrade-independent and is not re-copied here. -# We are NOT running approximate-non-hp-load for upgrades 4 and 5 because they strictly only apply to certain building types. + +# We are NOT running approximate-non-hp-load for upgrades 4 and 5 because they strictly only apply to certain building types. create-sb-release-for-adoption-upgrades-NY: just copy-resstock-data-2024-amy2018-2-NY "04 05" "metadata load_curve_hourly" just approximate-non-hp-load NY 01 res_2024_amy2018_2 res_2024_amy2018_2_sb 15 True True diff --git a/rate_design/hp_rates/ny/config/adoption/nyca_electrification.yaml b/rate_design/hp_rates/ny/config/adoption/nyca_electrification.yaml index aace3313..e3eaba98 100644 --- a/rate_design/hp_rates/ny/config/adoption/nyca_electrification.yaml +++ b/rate_design/hp_rates/ny/config/adoption/nyca_electrification.yaml @@ -23,16 +23,13 @@ # upgrade 4 (ASHP dual fuel): L=0.1087 k=0.2290 t0=2040.0 # upgrade 5 (ground source HP): L=0.0115 k=0.2633 t0=2039.3 # upgrade 1 (supplemental heat): L=0.1281 k=0.3098 t0=2040.2 - scenario_name: nyca_electrification random_seed: 42 - scenario: - 2: [0.0000, 0.0128, 0.0339, 0.0767, 0.1340, 0.1794] # ASHP full capacity - 4: [0.0000, 0.0100, 0.0263, 0.0544, 0.0825, 0.0987] # ASHP dual fuel - 5: [0.0000, 0.0009, 0.0028, 0.0063, 0.0094, 0.0109] # ground source HP - 1: [0.0000, 0.0052, 0.0211, 0.0617, 0.1043, 0.1222] # supplemental heat - + 2: [0.0000, 0.0128, 0.0339, 0.0767, 0.1340, 0.1794] # ASHP full capacity + 4: [0.0000, 0.0100, 0.0263, 0.0544, 0.0825, 0.0987] # ASHP dual fuel + 5: [0.0000, 0.0009, 0.0028, 0.0063, 0.0094, 0.0109] # ground source HP + 1: [0.0000, 0.0052, 0.0211, 0.0617, 0.1043, 0.1222] # supplemental heat # Calendar years for each scenario index (= run years). # Aligns with Cambium 5-year MC intervals; 2025 is baseline. year_labels: [2025, 2030, 2035, 2040, 2045, 2050] diff --git a/rate_design/hp_rates/run_scenario.py b/rate_design/hp_rates/run_scenario.py index 4bec325a..38516d0a 100644 --- a/rate_design/hp_rates/run_scenario.py +++ b/rate_design/hp_rates/run_scenario.py @@ -247,7 +247,11 @@ def _build_settings_from_yaml_run( if residual_cost_frac_raw is not None: residual_cost_frac = _parse_float(residual_cost_frac_raw, "residual_cost_frac") urr_raw = run.get("utility_revenue_requirement") - urr_present = urr_raw is not None and str(urr_raw).strip() not in ("", "none", "null") + urr_present = urr_raw is not None and str(urr_raw).strip() not in ( + "", + "none", + "null", + ) if residual_cost_frac is not None and urr_present: raise ValueError( "Specify exactly one of 'residual_cost_frac' or 'utility_revenue_requirement', " diff --git a/tests/pre/__init__.py b/tests/pre/__init__.py index 8b137891..e69de29b 100644 --- a/tests/pre/__init__.py +++ b/tests/pre/__init__.py @@ -1 +0,0 @@ - diff --git a/utils/pre/fit_adoption_config.py b/utils/pre/fit_adoption_config.py index c312e923..593229b7 100644 --- a/utils/pre/fit_adoption_config.py +++ b/utils/pre/fit_adoption_config.py @@ -89,12 +89,12 @@ def load_total_hu(state: str = "NY", max_year: int | None = None) -> tuple[float storage_options = {"aws_region": get_aws_region()} base = S3Path(_EIA861_S3_BASE) year_dirs = sorted( - int(p.name.split("=")[1]) - for p in base.iterdir() - if p.name.startswith("year=") + int(p.name.split("=")[1]) for p in base.iterdir() if p.name.startswith("year=") ) if not year_dirs: - raise FileNotFoundError(f"No EIA-861 year partitions found at {_EIA861_S3_BASE}") + raise FileNotFoundError( + f"No EIA-861 year partitions found at {_EIA861_S3_BASE}" + ) if max_year is not None: year_dirs = [y for y in year_dirs if y <= max_year] if not year_dirs: @@ -381,7 +381,9 @@ def make_plot( # Ordered technology names for the legend (matches NYISO chart order, bottom→top). tech_order = [_UPGRADE_PLOT_LABELS[uid] for uid in [2, 4, 5, 1]] - color_map = {_UPGRADE_PLOT_LABELS[uid]: _UPGRADE_COLORS[uid] for uid in [2, 4, 5, 1]} + color_map = { + _UPGRADE_PLOT_LABELS[uid]: _UPGRADE_COLORS[uid] for uid in [2, 4, 5, 1] + } # Convert to pandas for plotnine; use pandas Categorical for legend order. import pandas as pd # noqa: PLC0415 From 5b8d4bd560f1576dcb467d192af572a73be1fd99 Mon Sep 17 00:00:00 2001 From: sherryzuo <145484385+sherryzuo@users.noreply.github.com> Date: Mon, 23 Mar 2026 21:04:03 +0000 Subject: [PATCH 15/19] Add utils/buildstock and list_adoption_years utility - utils/buildstock: SbMixedUpgradeScenario for CAIRO-facing materialization, re-exports buildstock-fetch scenario helpers - list_adoption_years: print run years from adoption config YAML (respects run_years, snaps to year_labels); used by Justfile for adoption loop --- tests/pre/test_list_adoption_years.py | 71 ++++++++ utils/buildstock.py | 244 ++++++++++++++++++++++++++ utils/pre/list_adoption_years.py | 62 +++++++ 3 files changed, 377 insertions(+) create mode 100644 tests/pre/test_list_adoption_years.py create mode 100644 utils/buildstock.py create mode 100644 utils/pre/list_adoption_years.py diff --git a/tests/pre/test_list_adoption_years.py b/tests/pre/test_list_adoption_years.py new file mode 100644 index 00000000..46d76753 --- /dev/null +++ b/tests/pre/test_list_adoption_years.py @@ -0,0 +1,71 @@ +"""Tests for utils/pre/list_adoption_years.py.""" + +from __future__ import annotations + +import pytest + +from utils.pre.list_adoption_years import list_run_years, main + + +class TestListRunYears: + def test_all_labels_when_run_years_omitted(self) -> None: + config = { + "scenario_name": "test", + "year_labels": [2025, 2030, 2035], + } + assert list_run_years(config) == [2025, 2030, 2035] + + def test_run_years_subset(self) -> None: + config = { + "year_labels": [2025, 2030, 2035, 2040], + "run_years": [2025, 2040], + } + assert list_run_years(config) == [2025, 2040] + + def test_run_years_snaps_to_nearest(self) -> None: + config = { + "year_labels": [2025, 2030, 2035], + "run_years": [2027], + } + with pytest.warns(UserWarning, match="snapping"): + result = list_run_years(config) + assert result == [2025] + + def test_empty_year_labels(self) -> None: + config: dict = {"year_labels": []} + assert list_run_years(config) == [] + + def test_single_year(self) -> None: + config = {"year_labels": [2030]} + assert list_run_years(config) == [2030] + + def test_string_year_labels_normalised(self) -> None: + config = {"year_labels": ["2025", "2030"]} + assert list_run_years(config) == [2025, 2030] + + +class TestMain: + def test_prints_one_year_per_line(self, tmp_path, capsys) -> None: + p = tmp_path / "adoption.yaml" + p.write_text( + "scenario_name: test\nyear_labels: [2025, 2030, 2035]\n", + encoding="utf-8", + ) + main([str(p)]) + out = capsys.readouterr().out.strip().splitlines() + assert out == ["2025", "2030", "2035"] + + def test_respects_run_years(self, tmp_path, capsys) -> None: + p = tmp_path / "adoption.yaml" + p.write_text( + "scenario_name: test\nyear_labels: [2025, 2030, 2035]\nrun_years: [2030]\n", + encoding="utf-8", + ) + main([str(p)]) + out = capsys.readouterr().out.strip().splitlines() + assert out == ["2030"] + + def test_missing_arg_exits_nonzero(self, capsys) -> None: + with pytest.raises(SystemExit) as exc: + main([]) + assert exc.value.code != 0 diff --git a/utils/buildstock.py b/utils/buildstock.py new file mode 100644 index 00000000..53b41652 --- /dev/null +++ b/utils/buildstock.py @@ -0,0 +1,244 @@ +"""Buildstock integration for mixed-upgrade adoption materialization. + +This module keeps CAIRO-facing materialization logic in RDP while re-exporting +scenario helpers from `buildstock-fetch`. +""" + +from __future__ import annotations + +import csv +import os +import warnings +from pathlib import Path +from typing import Callable + +import polars as pl +from buildstock_fetch.scenarios import uniform_adoption, validate_scenario + +HAS_HP_COL = "postprocess_group.has_hp" +__all__ = ["SbMixedUpgradeScenario", "validate_scenario", "uniform_adoption"] + + +def _build_load_file_map(loads_dir: Path, bldg_ids: set[int]) -> dict[int, Path]: + """Return `{bldg_id: parquet_path}` for files present in `loads_dir`.""" + file_map: dict[int, Path] = {} + if not loads_dir.exists(): + return file_map + for path in loads_dir.glob("*.parquet"): + parts = path.stem.split("-", maxsplit=1) + if len(parts) != 2: + continue + if not parts[0].isdigit(): + continue + bldg_id = int(parts[0]) + if bldg_id in bldg_ids: + file_map[bldg_id] = path + return file_map + + +class SbMixedUpgradeScenario: + """Materialize mixed-upgrade scenario assignments for CAIRO input layout.""" + + def __init__( + self, + *, + path_resstock_release: Path, + state: str, + scenario_name: str, + scenario: dict[int, list[float]], + random_seed: int, + year_labels: list[int], + run_year_indices: list[int], + ) -> None: + self.path_resstock_release = path_resstock_release + self.state = state.upper() + self.scenario_name = scenario_name + self.scenario = scenario + self.random_seed = random_seed + self.year_labels = year_labels + self.run_year_indices = run_year_indices + self._metadata_cache: dict[int, pl.DataFrame] = {} + + validate_scenario(self.scenario) + + def _path_metadata(self, upgrade_id: int) -> Path: + return ( + self.path_resstock_release + / "metadata" + / f"state={self.state}" + / f"upgrade={upgrade_id:02d}" + / "metadata-sb.parquet" + ) + + def _path_loads(self, upgrade_id: int) -> Path: + return ( + self.path_resstock_release + / "load_curve_hourly" + / f"state={self.state}" + / f"upgrade={upgrade_id:02d}" + ) + + def _read_metadata(self, upgrade_id: int) -> pl.DataFrame: + cached = self._metadata_cache.get(upgrade_id) + if cached is not None: + return cached + + path = self._path_metadata(upgrade_id) + if not path.exists(): + raise FileNotFoundError( + f"Missing metadata file for upgrade={upgrade_id:02d}: {path}" + ) + df = pl.read_parquet(path) + self._metadata_cache[upgrade_id] = df + return df + + def compute_hp_pools( + self, + ) -> tuple[frozenset[int], frozenset[int], dict[int, set[int]]]: + """Return `(all_ids, eligible_ids, applicable_by_upgrade)`.""" + baseline_df = self._read_metadata(0) + all_ids = frozenset(baseline_df["bldg_id"].to_list()) + + if HAS_HP_COL in baseline_df.columns: + eligible = frozenset( + baseline_df.filter(pl.col(HAS_HP_COL) != True)["bldg_id"].to_list() # noqa: E712 + ) + else: + eligible = all_ids + + applicable_by_upgrade: dict[int, set[int]] = {} + for upgrade_id in sorted(self.scenario.keys()): + upgrade_df = self._read_metadata(upgrade_id) + if HAS_HP_COL not in upgrade_df.columns: + warnings.warn( + f"Upgrade {upgrade_id}: '{HAS_HP_COL}' missing; using full eligible pool.", + stacklevel=2, + ) + applicable_by_upgrade[upgrade_id] = set(eligible) + continue + applicable_ids = set( + upgrade_df.filter(pl.col(HAS_HP_COL) == True)["bldg_id"].to_list() # noqa: E712 + ) + applicable_by_upgrade[upgrade_id] = applicable_ids & set(eligible) + + return all_ids, eligible, applicable_by_upgrade + + def build_assignments( + self, + assign_buildings: Callable[ + [ + list[int], + dict[int, list[float]], + list[int], + int, + dict[int, set[int]] | None, + ], + dict[int, dict[int, int]], + ], + ) -> dict[int, dict[int, int]]: + """Build year-wise assignments including baseline-pinned buildings.""" + all_ids, eligible_ids, applicable_by_upgrade = self.compute_hp_pools() + + year_indices = list(range(len(self.year_labels))) + eligible_assignments = assign_buildings( + sorted(eligible_ids), + self.scenario, + year_indices, + self.random_seed, + applicable_by_upgrade, + ) + + full_assignments: dict[int, dict[int, int]] = {} + for year_idx in year_indices: + full_year = {bldg_id: 0 for bldg_id in all_ids} + full_year.update(eligible_assignments[year_idx]) + full_assignments[year_idx] = full_year + return full_assignments + + def materialize( + self, *, path_output_dir: Path, assignments: dict[int, dict[int, int]] + ) -> None: + """Write `metadata-sb.parquet` and load symlinks per run year.""" + path_output_dir.mkdir(parents=True, exist_ok=True) + + upgrades = [0, *sorted(self.scenario.keys())] + for upgrade_id in upgrades: + self._read_metadata(upgrade_id) + + for year_idx in self.run_year_indices: + calendar_year = self.year_labels[year_idx] + year_dir = path_output_dir / f"year={calendar_year}" + year_dir.mkdir(parents=True, exist_ok=True) + # All buildings land in a single upgrade=00 partition so that + # scan_resstock_loads (hive-partitioned) and CAIRO can share the + # same base path. The symlink targets still point at the correct + # per-building upgrade source files. + loads_out_dir = ( + year_dir / "load_curve_hourly" / f"state={self.state}" / "upgrade=00" + ) + loads_out_dir.mkdir(parents=True, exist_ok=True) + year_map = assignments[year_idx] + + bldgs_by_upgrade: dict[int, set[int]] = { + upgrade_id: set() for upgrade_id in upgrades + } + for bldg_id, upgrade_id in year_map.items(): + bldgs_by_upgrade.setdefault(upgrade_id, set()).add(bldg_id) + + metadata_parts: list[pl.DataFrame] = [] + for upgrade_id in upgrades: + bldg_ids = bldgs_by_upgrade[upgrade_id] + if not bldg_ids: + continue + + metadata_parts.append( + self._metadata_cache[upgrade_id].filter( + pl.col("bldg_id").is_in(sorted(bldg_ids)) + ) + ) + + loads_dir = self._path_loads(upgrade_id) + if not loads_dir.exists(): + raise FileNotFoundError( + f"Missing loads directory for upgrade={upgrade_id:02d}: {loads_dir}" + ) + load_map = _build_load_file_map(loads_dir, bldg_ids) + missing = sorted(bldg_ids - set(load_map.keys())) + if missing: + raise FileNotFoundError( + f"Missing load parquet(s) for upgrade={upgrade_id:02d}; " + f"first missing bldg_id={missing[0]}" + ) + for src in load_map.values(): + dst = loads_out_dir / src.name + if dst.exists() or dst.is_symlink(): + dst.unlink() + os.symlink(src.resolve(), dst) + + if not metadata_parts: + raise ValueError(f"No metadata rows found for year index {year_idx}") + + metadata_df = pl.concat(metadata_parts, how="diagonal_relaxed").sort("bldg_id") + metadata_df.write_parquet(year_dir / "metadata-sb.parquet") + + def export_scenario_csv( + self, *, path_output_dir: Path, assignments: dict[int, dict[int, int]] + ) -> None: + """Write `scenario_assignments.csv` in the existing format.""" + path_output_dir.mkdir(parents=True, exist_ok=True) + bldg_ids = sorted(assignments[0].keys()) + csv_path = path_output_dir / "scenario_assignments.csv" + + with open(csv_path, "w", encoding="utf-8", newline="") as f: + writer = csv.writer(f) + writer.writerow(["bldg_id", *[f"year_{y}" for y in self.year_labels]]) + for bldg_id in bldg_ids: + writer.writerow( + [ + bldg_id, + *[ + assignments[year_idx][bldg_id] + for year_idx in range(len(self.year_labels)) + ], + ] + ) diff --git a/utils/pre/list_adoption_years.py b/utils/pre/list_adoption_years.py new file mode 100644 index 00000000..6720fef0 --- /dev/null +++ b/utils/pre/list_adoption_years.py @@ -0,0 +1,62 @@ +"""Print one calendar year per line for each run year in an adoption config YAML. + +Respects ``run_years`` when present; otherwise uses all ``year_labels``. +Snaps ``run_years`` entries to the nearest ``year_label`` (matching the logic +in ``materialize_mixed_upgrade`` and ``generate_adoption_scenario_yamls``). + +Usage:: + + uv run python utils/pre/list_adoption_years.py path/to/config.yaml + +Exit code 0; prints one integer per line to stdout. Supersedes +``count_adoption_years.py`` — callers get the actual years and the count +implicitly via ``${#year_list[@]}`` in bash. +""" + +from __future__ import annotations + +import sys +import warnings +from pathlib import Path + +import numpy as np +import yaml + + +def list_run_years(config: dict) -> list[int]: + """Return the ordered list of calendar years to run, honouring ``run_years``.""" + year_labels: list[int] = [int(y) for y in config.get("year_labels", [])] + run_years_raw: list[int] | None = config.get("run_years") + + if run_years_raw is None: + return year_labels + + result: list[int] = [] + for yr in run_years_raw: + distances = [abs(yl - int(yr)) for yl in year_labels] + nearest_idx = int(np.argmin(distances)) + nearest_year = year_labels[nearest_idx] + if nearest_year != int(yr): + warnings.warn( + f"run_years entry {yr} not in year_labels; " + f"snapping to {nearest_year} (index {nearest_idx})", + stacklevel=2, + ) + result.append(nearest_year) + return result + + +def main(args: list[str] | None = None) -> None: + argv = args if args is not None else sys.argv[1:] + if not argv: + print("usage: list_adoption_years.py ", file=sys.stderr) + sys.exit(1) + path = Path(argv[0]) + with path.open(encoding="utf-8") as f: + cfg = yaml.safe_load(f) + for year in list_run_years(cfg): + print(year) + + +if __name__ == "__main__": + main() From 156166982f0a5f78542a36c14744106cc4cc822c Mon Sep 17 00:00:00 2001 From: sherryzuo <145484385+sherryzuo@users.noreply.github.com> Date: Mon, 23 Mar 2026 21:04:09 +0000 Subject: [PATCH 16/19] Refactor materialize_mixed_upgrade to use utils.buildstock - Delegate scenario materialization to SbMixedUpgradeScenario - Add --release for release dir under path-resstock-release - Simplify CLI and config parsing; trim docstring --- tests/pre/test_materialize_mixed_upgrade.py | 194 +++++++- utils/pre/materialize_mixed_upgrade.py | 514 +++++--------------- 2 files changed, 290 insertions(+), 418 deletions(-) diff --git a/tests/pre/test_materialize_mixed_upgrade.py b/tests/pre/test_materialize_mixed_upgrade.py index 33aa494f..59e2cce6 100644 --- a/tests/pre/test_materialize_mixed_upgrade.py +++ b/tests/pre/test_materialize_mixed_upgrade.py @@ -9,6 +9,7 @@ import pytest from buildstock_fetch.scenarios import InvalidScenarioError, validate_scenario +from utils.buildstock import SbMixedUpgradeScenario from utils.pre.materialize_mixed_upgrade import ( _build_load_file_map, _parse_adoption_config, @@ -251,24 +252,25 @@ def test_applicable_smaller_than_target_warns_and_caps(self) -> None: def test_overlapping_sets_no_double_assignment(self) -> None: """When applicable sets overlap, each building gets at most one upgrade.""" bldg_ids = _bldg_ids(100) - # Both upgrades applicable to all 100 buildings; upgrade 2 (lower ID) gets - # first pick and claims all buildings, leaving upgrade 4 with an empty pool. + # Both upgrades applicable to all 100 buildings. applicable = {2: set(range(1, 101)), 4: set(range(1, 101))} scenario = {2: [0.20], 4: [0.15]} - with pytest.warns(UserWarning, match="Upgrade 4"): - assignments = assign_buildings( - bldg_ids, - scenario, - [0], - random_seed=0, - applicable_bldg_ids_per_upgrade=applicable, - ) + assignments = assign_buildings( + bldg_ids, + scenario, + [0], + random_seed=0, + applicable_bldg_ids_per_upgrade=applicable, + ) for bid, uid in assignments[0].items(): assert uid in {0, 2, 4}, f"bldg {bid} assigned unknown upgrade {uid}" # No building should be in two upgrade pools assigned_to_2 = {bid for bid, u in assignments[0].items() if u == 2} assigned_to_4 = {bid for bid, u in assignments[0].items() if u == 4} assert assigned_to_2.isdisjoint(assigned_to_4) + # Later upgrades overwrite overlapping assignments from earlier upgrades. + assert len(assigned_to_4) == 15 + assert len(assigned_to_2) + len(assigned_to_4) == 20 def test_monotonicity_preserved_with_applicability(self) -> None: """Monotonic adoption holds when using applicable_bldg_ids_per_upgrade.""" @@ -518,31 +520,40 @@ def fs_and_out(self, tmp_path: Path) -> tuple[Path, Path]: ) return release, out_dir + def _loads_dir(self, out_dir: Path, year: int = 2025, state: str = "RI") -> Path: + return ( + out_dir + / f"year={year}" + / "load_curve_hourly" + / f"state={state}" + / "upgrade=00" + ) + def test_loads_dir_exists(self, fs_and_out: tuple[Path, Path]) -> None: _, out_dir = fs_and_out - assert (out_dir / "year=2025" / "loads").is_dir() + assert self._loads_dir(out_dir).is_dir() def test_symlink_count_equals_building_count( self, fs_and_out: tuple[Path, Path] ) -> None: _, out_dir = fs_and_out - links = list((out_dir / "year=2025" / "loads").iterdir()) + links = list(self._loads_dir(out_dir).iterdir()) assert len(links) == 5 def test_symlinks_are_actual_symlinks(self, fs_and_out: tuple[Path, Path]) -> None: _, out_dir = fs_and_out - for p in (out_dir / "year=2025" / "loads").iterdir(): + for p in self._loads_dir(out_dir).iterdir(): assert p.is_symlink(), f"{p} is not a symlink" def test_symlink_targets_exist(self, fs_and_out: tuple[Path, Path]) -> None: release, out_dir = fs_and_out - for p in (out_dir / "year=2025" / "loads").iterdir(): + for p in self._loads_dir(out_dir).iterdir(): assert p.resolve().exists(), f"Dangling symlink: {p}" def test_symlink_filename_convention(self, fs_and_out: tuple[Path, Path]) -> None: """All symlink names follow {bldg_id}-{upgrade_id}.parquet.""" _, out_dir = fs_and_out - for p in (out_dir / "year=2025" / "loads").iterdir(): + for p in self._loads_dir(out_dir).iterdir(): stem = p.stem # e.g. "3-02" parts = stem.split("-", maxsplit=1) assert len(parts) == 2, f"Unexpected filename: {p.name}" @@ -555,8 +566,7 @@ def test_assigned_buildings_link_to_correct_upgrade( ) -> None: """Buildings assigned to upgrade 2 must symlink to upgrade=02 load files.""" release, out_dir = fs_and_out - loads_dir = out_dir / "year=2025" / "loads" - for link in loads_dir.iterdir(): + for link in self._loads_dir(out_dir).iterdir(): target = link.resolve() # The upgrade_id is encoded in the filename (e.g. "3-02.parquet"). stem = link.stem @@ -566,6 +576,14 @@ def test_assigned_buildings_link_to_correct_upgrade( f"Symlink {link.name} → {target} does not point into {expected_upgrade_dir}" ) + def test_hive_partition_path(self, fs_and_out: tuple[Path, Path]) -> None: + """Symlinks live under load_curve_hourly/state=RI/upgrade=00/ (hive layout).""" + _, out_dir = fs_and_out + hive_dir = self._loads_dir(out_dir) + assert hive_dir.is_dir() + # No flat 'loads/' directory should exist. + assert not (out_dir / "year=2025" / "loads").exists() + # --------------------------------------------------------------------------- # 6. Scenario CSV output @@ -676,7 +694,7 @@ def test_missing_upgrade_metadata_raises(self, tmp_path: Path) -> None: adoption_yaml = tmp_path / "adoption.yaml" adoption_yaml.write_text( - "scenario_name: t\nrandom_seed: 0\nscenario:\n 2: [0.10]\n" + "scenario_name: t\nrandom_seed: 0\nscenario:\n 2: [1.00]\n" "year_labels: [2025]\n", encoding="utf-8", ) @@ -699,6 +717,7 @@ def test_missing_upgrade_metadata_raises(self, tmp_path: Path) -> None: def test_missing_loads_dir_raises(self, tmp_path: Path) -> None: release = tmp_path / "release" # Create metadata for both upgrades but omit loads dir for upgrade=02. + bldg_ids = [1, 2] for uid in [0, 2]: meta = ( release @@ -707,13 +726,16 @@ def test_missing_loads_dir_raises(self, tmp_path: Path) -> None: / f"upgrade={uid:02d}" / "metadata-sb.parquet" ) - _write_metadata(meta, _make_metadata_df([1, 2])) + has_hp = [False] * len(bldg_ids) if uid == 0 else [True] * len(bldg_ids) + _write_metadata(meta, _make_metadata_df(bldg_ids, has_hp)) loads_dir_0 = release / "load_curve_hourly" / "state=NY" / "upgrade=00" loads_dir_0.mkdir(parents=True) + for bid in bldg_ids: + _touch_load_file(loads_dir_0, bid, 0) adoption_yaml = tmp_path / "adoption.yaml" adoption_yaml.write_text( - "scenario_name: t\nrandom_seed: 0\nscenario:\n 2: [0.10]\n" + "scenario_name: t\nrandom_seed: 0\nscenario:\n 2: [1.00]\n" "year_labels: [2025]\n", encoding="utf-8", ) @@ -906,3 +928,135 @@ def test_empty_directory_returns_empty(self, tmp_path: Path) -> None: d.mkdir() result = _build_load_file_map(d, {1, 2}) assert result == {} + + +# --------------------------------------------------------------------------- +# 10. SbMixedUpgradeScenario wrapper behavior +# --------------------------------------------------------------------------- + + +class TestSbMixedUpgradeScenario: + def test_release_mapping_via_main_uses_subdir(self, tmp_path: Path) -> None: + root = tmp_path / "resstock_root" + release_name = "res_2024_amy2018_2_sb" + release = root / release_name + bldg_ids = [1, 2, 3, 4] + + for uid in [0, 2]: + meta = ( + release + / "metadata" + / "state=NY" + / f"upgrade={uid:02d}" + / "metadata-sb.parquet" + ) + has_hp = [False] * len(bldg_ids) if uid == 0 else [True] * len(bldg_ids) + _write_metadata(meta, _make_metadata_df(bldg_ids, has_hp)) + loads = release / "load_curve_hourly" / "state=NY" / f"upgrade={uid:02d}" + for bid in bldg_ids: + _touch_load_file(loads, bid, uid) + + adoption_yaml = tmp_path / "adoption.yaml" + adoption_yaml.write_text( + "scenario_name: test\nrandom_seed: 0\nscenario:\n 2: [0.5]\n" + "year_labels: [2025]\n", + encoding="utf-8", + ) + + out_dir = tmp_path / "out" + main( + [ + "--state", + "ny", + "--utility", + "test", + "--adoption-config", + str(adoption_yaml), + "--path-resstock-release", + str(root), + "--release", + release_name, + "--output-dir", + str(out_dir), + ] + ) + assert (out_dir / "year=2025" / "metadata-sb.parquet").exists() + + def test_uses_metadata_sb_variant(self, tmp_path: Path) -> None: + release = tmp_path / "release" + bldg_ids = [1, 2, 3] + + for uid in [0, 2]: + meta = ( + release + / "metadata" + / "state=RI" + / f"upgrade={uid:02d}" + / "metadata-sb.parquet" + ) + has_hp = [False] * len(bldg_ids) if uid == 0 else [True] * len(bldg_ids) + _write_metadata(meta, _make_metadata_df(bldg_ids, has_hp)) + loads = release / "load_curve_hourly" / "state=RI" / f"upgrade={uid:02d}" + for bid in bldg_ids: + _touch_load_file(loads, bid, uid) + + adoption_yaml = tmp_path / "adoption.yaml" + adoption_yaml.write_text( + "scenario_name: test\nrandom_seed: 0\nscenario:\n 2: [0.34]\n" + "year_labels: [2025]\n", + encoding="utf-8", + ) + + out_dir = tmp_path / "out" + main( + [ + "--state", + "ri", + "--utility", + "test", + "--adoption-config", + str(adoption_yaml), + "--path-resstock-release", + str(release), + "--output-dir", + str(out_dir), + ] + ) + assert (out_dir / "year=2025" / "metadata-sb.parquet").exists() + + def test_hp_filtering_integration_baseline_pin_and_applicability( + self, tmp_path: Path + ) -> None: + release = tmp_path / "release" + bldg_ids = [1, 2, 3, 4, 5, 6] + baseline_has_hp = [False, False, False, True, True, True] + upgrade_2_has_hp = [True, True, False, False, False, False] + + _write_metadata( + release / "metadata" / "state=NY" / "upgrade=00" / "metadata-sb.parquet", + _make_metadata_df(bldg_ids, baseline_has_hp), + ) + _write_metadata( + release / "metadata" / "state=NY" / "upgrade=02" / "metadata-sb.parquet", + _make_metadata_df(bldg_ids, upgrade_2_has_hp), + ) + + mixed = SbMixedUpgradeScenario( + path_resstock_release=release, + state="ny", + scenario_name="test", + scenario={2: [0.5]}, + random_seed=0, + year_labels=[2025], + run_year_indices=[0], + ) + assignments = mixed.build_assignments(assign_buildings) + year0 = assignments[0] + + # Baseline-HP buildings must stay on upgrade 0. + for bldg_id in [4, 5, 6]: + assert year0[bldg_id] == 0 + + # Newly assigned HP buildings must come from upgrade-specific applicability. + assigned_to_2 = {bid for bid, uid in year0.items() if uid == 2} + assert assigned_to_2.issubset({1, 2}) diff --git a/utils/pre/materialize_mixed_upgrade.py b/utils/pre/materialize_mixed_upgrade.py index 1f52e827..a69a12f1 100644 --- a/utils/pre/materialize_mixed_upgrade.py +++ b/utils/pre/materialize_mixed_upgrade.py @@ -1,56 +1,20 @@ -"""Materialize per-year ResStock data for mixed-upgrade HP adoption trajectories. - -Reads an adoption config YAML (scenario fractions per upgrade per year), assigns -buildings to upgrades using a monotonic random-seed allocation, and writes one -directory per run year containing: - -- ``metadata-sb.parquet``: combined metadata rows from the assigned upgrades. -- ``loads/``: directory of symlinks pointing each building to the correct - upgrade's load parquet (``{bldg_id}-{N}.parquet``). - -The output mirrors the layout that ``run_scenario.py`` already expects for a -single-upgrade run, so no changes are needed to the scenario runner. - -Building assignment algorithm ------------------------------- -Buildings are shuffled once using the adoption config's ``random_seed``. Each -upgrade is pre-allocated a contiguous band of slots in the shuffled order (based -on its maximum fraction across all years, which is the last year's fraction since -fractions are non-decreasing). At year *t*, the first ``int(N × f[u][t])`` -buildings in upgrade *u*'s band are assigned to that upgrade; the rest remain at -upgrade 0 (baseline). This guarantees: - -- No building is assigned to more than one upgrade at a time. -- Once a building adopts an upgrade, it never reverts (monotonicity). -- The total assigned fraction never exceeds 1.0 (enforced by ``validate_scenario``). - -Usage ------ -:: - - uv run python utils/pre/materialize_mixed_upgrade.py \\ - --state ri \\ - --utility rie \\ - --adoption-config rate_design/hp_rates/ny/config/adoption/nyca_electrification.yaml \\ - --path-resstock-release /ebs/data/nrel/resstock/res_2024_amy2018_2_sb \\ - --output-dir /ebs/data/nrel/resstock/res_2024_amy2018_2_sb/adoption/nyca_electrification -""" +"""Materialize per-year ResStock data for mixed-upgrade HP adoption trajectories.""" from __future__ import annotations import argparse -import csv -import os import sys import warnings from pathlib import Path from typing import Any import numpy as np -import polars as pl import yaml -from buildstock_fetch.scenarios import validate_scenario +from utils.buildstock import ( + SbMixedUpgradeScenario, + _build_load_file_map as _buildstock_load_file_map, +) def build_parser() -> argparse.ArgumentParser: @@ -71,7 +35,12 @@ def build_parser() -> argparse.ArgumentParser: p.add_argument( "--path-resstock-release", required=True, - help="Root path of the processed ResStock _sb release (local or s3://).", + help="ResStock release path or root path containing the release.", + ) + p.add_argument( + "--release", + required=False, + help="Optional release directory name under --path-resstock-release.", ) p.add_argument( "--output-dir", @@ -83,11 +52,6 @@ def build_parser() -> argparse.ArgumentParser: return p -# --------------------------------------------------------------------------- -# Adoption config helpers -# --------------------------------------------------------------------------- - - def _load_adoption_config(path: Path) -> dict[str, Any]: with open(path, encoding="utf-8") as f: return yaml.safe_load(f) @@ -96,17 +60,10 @@ def _load_adoption_config(path: Path) -> dict[str, Any]: def _parse_adoption_config( config: dict[str, Any], ) -> tuple[str, int, dict[int, list[float]], list[int], list[int]]: - """Parse and return core fields from the adoption config. - - Returns: - (scenario_name, random_seed, scenario, year_labels, run_year_indices) - where ``run_year_indices`` are the indices into ``year_labels`` that - correspond to the years that should be materialized. - """ + """Parse and return core fields from the adoption config.""" scenario_name: str = config["scenario_name"] random_seed: int = int(config.get("random_seed", 42)) - # Keys may come from YAML as integers or strings; normalise to int. scenario_raw: dict[Any, list[float]] = config["scenario"] scenario: dict[int, list[float]] = { int(k): [float(v) for v in vals] for k, vals in scenario_raw.items() @@ -134,387 +91,148 @@ def _parse_adoption_config( return scenario_name, random_seed, scenario, year_labels, run_year_indices -# --------------------------------------------------------------------------- -# Path helpers -# --------------------------------------------------------------------------- - - -def _upgrade_dir_name(upgrade_id: int) -> str: - return f"upgrade={upgrade_id:02d}" - - -def _metadata_path( - path_resstock_release: Path, state_upper: str, upgrade_id: int -) -> Path: - return ( - path_resstock_release - / "metadata" - / f"state={state_upper}" - / _upgrade_dir_name(upgrade_id) - / "metadata-sb.parquet" - ) - - -def _loads_dir(path_resstock_release: Path, state_upper: str, upgrade_id: int) -> Path: - return ( - path_resstock_release - / "load_curve_hourly" - / f"state={state_upper}" - / _upgrade_dir_name(upgrade_id) - ) - - -def _check_upgrade_paths( - path_resstock_release: Path, - state_upper: str, - upgrade_ids: list[int], -) -> None: - """Raise FileNotFoundError listing all missing upgrade metadata paths.""" - missing: list[str] = [] - for uid in upgrade_ids: - p = _metadata_path(path_resstock_release, state_upper, uid) - if not p.exists(): - missing.append(str(p)) - if missing: - raise FileNotFoundError( - "Missing required upgrade metadata files:\n" + "\n".join(missing) - ) - - -def _check_loads_dirs( - path_resstock_release: Path, - state_upper: str, - upgrade_ids: list[int], -) -> None: - """Raise FileNotFoundError listing all missing loads directories.""" - missing: list[str] = [] - for uid in upgrade_ids: - d = _loads_dir(path_resstock_release, state_upper, uid) - if not d.is_dir(): - missing.append(str(d)) - if missing: - raise FileNotFoundError( - "Missing required loads directories:\n" + "\n".join(missing) - ) - - -# --------------------------------------------------------------------------- -# Building assignment -# --------------------------------------------------------------------------- +def _build_load_file_map(loads_dir: Path, bldg_ids: set[int]) -> dict[int, Path]: + """Compatibility shim re-exported for existing tests/imports.""" + return _buildstock_load_file_map(loads_dir, bldg_ids) def assign_buildings( - eligible_bldg_ids: list[int], + bldg_ids: list[int], scenario: dict[int, list[float]], run_year_indices: list[int], random_seed: int, applicable_bldg_ids_per_upgrade: dict[int, set[int]] | None = None, ) -> dict[int, dict[int, int]]: - """Assign buildings to upgrades per run-year index. - - Only buildings that do **not** already have a heat pump should be passed - via ``eligible_bldg_ids``. Buildings already at HP in the baseline are - excluded upstream and kept pinned to upgrade 0 in all years. - - Args: - eligible_bldg_ids: Building IDs eligible for HP adoption (i.e. those - whose ``postprocess_group.has_hp`` is not True in upgrade-0 metadata). - scenario: Dict mapping upgrade_id → per-year cumulative adoption fractions. - Fractions are relative to the *total* building population (all upgrades - combined), so the caller is responsible for passing a proportionally - correct subset. - run_year_indices: Indices into the scenario lists to materialise. - random_seed: Seed for reproducible shuffling. - applicable_bldg_ids_per_upgrade: Optional per-upgrade sets of building IDs - that are actually applicable for each upgrade (i.e. buildings where - ``postprocess_group.has_hp`` is True in that upgrade's metadata). - When provided, each upgrade draws only from its applicable pool rather - than the full eligible pool, preventing non-applicable buildings (which - carry baseline loads regardless of upgrade assignment) from being counted - as HP adopters. If two upgrades share applicable buildings, earlier - upgrades in sorted order take priority. If ``None``, all eligible - buildings are candidates for every upgrade (original behaviour). - - Returns: - ``{year_index: {bldg_id: upgrade_id}}`` — upgrade 0 means "baseline". - Only covers ``eligible_bldg_ids``; already-HP buildings are not included. - """ - n_total = len(eligible_bldg_ids) - if n_total == 0: - return {t: {} for t in run_year_indices} - - rng = np.random.default_rng(random_seed) - upgrades_sorted = sorted(scenario.keys()) - num_years = len(next(iter(scenario.values()))) - last_t = num_years - 1 - - if applicable_bldg_ids_per_upgrade is not None: - # Build per-upgrade pools restricted to applicable buildings. - # Each eligible building goes to the first upgrade (by sorted ID) for - # which it is applicable, so pools are non-overlapping. - eligible_set = set(eligible_bldg_ids) - claimed: set[int] = set() - per_upgrade_pools: dict[int, np.ndarray] = {} - for u in upgrades_sorted: - applicable = applicable_bldg_ids_per_upgrade.get(u, set()) - pool = sorted(applicable & eligible_set - claimed) - arr = np.array(pool, dtype=np.int64) - rng.shuffle(arr) - per_upgrade_pools[u] = arr - claimed.update(pool) - - result: dict[int, dict[int, int]] = {} + """Assign buildings to upgrades by year, preserving monotonic adoption.""" + assignments = {t: {} for t in run_year_indices} + if not bldg_ids: + return assignments + + shuffled = np.array(sorted(bldg_ids), dtype=int) + np.random.default_rng(random_seed).shuffle(shuffled) + shuffled_ids = shuffled.tolist() + + upgrade_order = list(scenario.keys()) + upgrade_allocations: dict[int, list[int]] = {uid: [] for uid in upgrade_order} + + if applicable_bldg_ids_per_upgrade is None: for t in run_year_indices: - assignments: dict[int, int] = {bid: 0 for bid in eligible_bldg_ids} - for u in upgrades_sorted: - pool = per_upgrade_pools[u] - # Fractions are of total eligible population, not just the pool. - count_t = int(n_total * scenario[u][t]) - actual_count = min(count_t, len(pool)) - if actual_count < count_t: + assigned_any = set().union( + *( + set(upgrade_allocations[uid]) + for uid in upgrade_order + if upgrade_allocations[uid] + ) + ) + for uid in upgrade_order: + target = int(len(bldg_ids) * scenario[uid][t]) + current = len(upgrade_allocations[uid]) + needed = max(0, target - current) + if needed == 0: + continue + available = [bid for bid in shuffled_ids if bid not in assigned_any] + take = available[:needed] + upgrade_allocations[uid].extend(take) + assigned_any.update(take) + if len(take) < needed: warnings.warn( - f"Upgrade {u}: target {count_t} buildings " - f"but only {len(pool)} are applicable; " - f"capping at {actual_count}. " - "Consider reducing the adoption fraction for this upgrade.", + f"Upgrade {uid}: target {target} buildings but only " + f"{len(available)} available; capping at {current + len(take)}.", stacklevel=2, ) - for i in range(actual_count): - assignments[int(pool[i])] = u - result[t] = assignments - return result - - # Original behaviour: one shuffled array, contiguous non-overlapping bands. - bldg_array = np.array(sorted(eligible_bldg_ids), dtype=np.int64) - rng.shuffle(bldg_array) - - upgrade_offsets: dict[int, int] = {} - cumulative_offset = 0 - for u in upgrades_sorted: - upgrade_offsets[u] = cumulative_offset - max_count = int(n_total * scenario[u][last_t]) - cumulative_offset += max_count - - result = {} - for t in run_year_indices: - assignments = {int(bid): 0 for bid in bldg_array} - for u in upgrades_sorted: - count_t = int(n_total * scenario[u][t]) - offset = upgrade_offsets[u] - for i in range(count_t): - assignments[int(bldg_array[offset + i])] = u - result[t] = assignments + else: + filtered_pools: dict[int, list[int]] = {} + for uid in upgrade_order: + applicable = applicable_bldg_ids_per_upgrade.get(uid, set()) + # Keep per-upgrade pools independent. If pools overlap, final + # assignment below resolves conflicts by iteration order. + filtered_pools[uid] = [bid for bid in shuffled_ids if bid in applicable] - return result + for t in run_year_indices: + for uid in upgrade_order: + target = int(len(bldg_ids) * scenario[uid][t]) + current = len(upgrade_allocations[uid]) + if target <= current: + continue + pool = filtered_pools[uid] + if target > len(pool): + warnings.warn( + f"Upgrade {uid}: target {target} buildings but only " + f"{len(pool)} are applicable; capping at {len(pool)}.", + stacklevel=2, + ) + target = len(pool) + upgrade_allocations[uid].extend(pool[current:target]) + all_ids = set(bldg_ids) + for t in run_year_indices: + year_map = {bid: 0 for bid in bldg_ids} + for uid in upgrade_order: + target = int(len(bldg_ids) * scenario[uid][t]) + for bid in upgrade_allocations[uid][:target]: + year_map[bid] = uid + assigned = set(bid for bid, uid in year_map.items() if uid != 0) + if not assigned.issubset(all_ids): + raise ValueError( + "Internal assignment bug: assigned building outside input set" + ) + assignments[t] = year_map + return assignments -# --------------------------------------------------------------------------- -# Load-file discovery -# --------------------------------------------------------------------------- +def _resolve_release_path(path_resstock_release: Path, release: str | None) -> Path: + """Resolve the on-disk release directory, preferring `_sb` when available.""" + if release: + if path_resstock_release.name in {release, f"{release}_sb"}: + return path_resstock_release + candidate_sb = path_resstock_release / f"{release}_sb" + if candidate_sb.exists(): + return candidate_sb + candidate = path_resstock_release / release + if candidate.exists(): + return candidate + return candidate_sb -def _build_load_file_map(loads_dir: Path, bldg_ids: set[int]) -> dict[int, Path]: - """Scan ``loads_dir`` and return ``{bldg_id: path}`` for each matching building. - - Files are expected to be named ``{bldg_id}-{something}.parquet``. Unmatched - files and files whose bldg_id is not in ``bldg_ids`` are silently skipped. - """ - result: dict[int, Path] = {} - for f in loads_dir.glob("*.parquet"): - parts = f.stem.split("-", maxsplit=1) - if not parts: - continue - try: - bldg_id = int(parts[0]) - except ValueError: - continue - if bldg_ids and bldg_id not in bldg_ids: - continue - result[bldg_id] = f - return result - - -# --------------------------------------------------------------------------- -# Main -# --------------------------------------------------------------------------- + # No explicit release: use the provided path as-is. + return path_resstock_release def main(argv: list[str] | None = None) -> None: args = build_parser().parse_args(argv) path_adoption_config = Path(args.path_adoption_config) - path_resstock_release = Path(args.path_resstock_release) path_output_dir = Path(args.path_output_dir) - state_upper = args.state.upper() + state = args.state.lower() + path_release = _resolve_release_path( + Path(args.path_resstock_release), getattr(args, "release", None) + ) - # 1. Load and validate adoption config. config = _load_adoption_config(path_adoption_config) scenario_name, random_seed, scenario, year_labels, run_year_indices = ( _parse_adoption_config(config) ) - validate_scenario(scenario) - - non_baseline_upgrades = sorted(scenario.keys()) - all_upgrades = sorted({0} | set(non_baseline_upgrades)) print( - f"Materialising '{scenario_name}' for state={state_upper}, " - f"utility={args.utility}" + f"Materialising '{scenario_name}' for state={state.upper()}, utility={args.utility}" ) print( - f" upgrades: {all_upgrades} | " + f" upgrades: {[0, *sorted(scenario.keys())]} | " f"years: {[year_labels[t] for t in run_year_indices]}" ) - # 2. Verify all required upgrade directories exist. - _check_upgrade_paths(path_resstock_release, state_upper, all_upgrades) - _check_loads_dirs(path_resstock_release, state_upper, all_upgrades) - - # 3. Load baseline metadata; split into HP-eligible and already-HP buildings. - baseline_meta_path = _metadata_path(path_resstock_release, state_upper, 0) - baseline_df = pl.read_parquet(baseline_meta_path) - all_bldg_ids: list[int] = baseline_df["bldg_id"].to_list() - - # Buildings that already heat with a heat pump in the baseline must NOT be - # re-assigned — they are pinned to upgrade 0 in every year. - has_hp_col = "postprocess_group.has_hp" - if has_hp_col in baseline_df.columns: - already_hp_mask = baseline_df[has_hp_col] == True # noqa: E712 - already_hp_bldg_ids: list[int] = baseline_df.filter(already_hp_mask)[ - "bldg_id" - ].to_list() - eligible_bldg_ids: list[int] = baseline_df.filter(~already_hp_mask)[ - "bldg_id" - ].to_list() - else: - already_hp_bldg_ids = [] - eligible_bldg_ids = all_bldg_ids - - print( - f" total buildings (upgrade 0): {len(all_bldg_ids)} " - f"({len(eligible_bldg_ids)} HP-eligible, " - f"{len(already_hp_bldg_ids)} already have HP → kept at upgrade 0)" + mixed = SbMixedUpgradeScenario( + path_resstock_release=path_release, + state=state, + scenario_name=scenario_name, + scenario=scenario, + random_seed=random_seed, + year_labels=year_labels, + run_year_indices=run_year_indices, ) + assignments = mixed.build_assignments(assign_buildings) + mixed.materialize(path_output_dir=path_output_dir, assignments=assignments) + mixed.export_scenario_csv(path_output_dir=path_output_dir, assignments=assignments) - # 4. Load all upgrade metadata DataFrames now so applicability can be computed. - upgrade_dfs: dict[int, pl.DataFrame] = {0: baseline_df} - for uid in non_baseline_upgrades: - upgrade_dfs[uid] = pl.read_parquet( - _metadata_path(path_resstock_release, state_upper, uid) - ) - - # 5. Assign only eligible buildings to upgrades per run year. - # For each non-baseline upgrade, restrict the pool to buildings that actually - # received the upgrade in ResStock (postprocess_group.has_hp=True in that - # upgrade's metadata). This prevents assigning e.g. GSHP to ductless buildings - # or dual-fuel ASHP to electrically-heated buildings — those buildings have - # baseline loads in the upgrade data regardless of which upgrade they're placed in. - applicable_bldg_ids_per_upgrade: dict[int, set[int]] | None = None - if all(has_hp_col in upgrade_dfs[uid].columns for uid in non_baseline_upgrades): - applicable_bldg_ids_per_upgrade = {} - eligible_set = set(eligible_bldg_ids) - for uid in non_baseline_upgrades: - applicable = ( - set(upgrade_dfs[uid].filter(pl.col(has_hp_col))["bldg_id"].to_list()) - & eligible_set - ) - applicable_bldg_ids_per_upgrade[uid] = applicable - print( - f" upgrade {uid}: {len(applicable)} applicable eligible buildings " - f"(has_hp=True in upgrade metadata, not already HP in baseline)" - ) - else: - print( - " Warning: postprocess_group.has_hp missing from one or more upgrade " - "metadata files; falling back to unrestricted pool for all upgrades." - ) - - eligible_assignments_by_year = assign_buildings( - eligible_bldg_ids, - scenario, - run_year_indices, - random_seed, - applicable_bldg_ids_per_upgrade=applicable_bldg_ids_per_upgrade, - ) - - # Merge already-HP buildings back in (pinned to upgrade 0 in all years). - already_hp_baseline: dict[int, int] = {bid: 0 for bid in already_hp_bldg_ids} - assignments_by_year: dict[int, dict[int, int]] = { - t: {**eligible_assignments_by_year[t], **already_hp_baseline} - for t in run_year_indices - } - - path_output_dir.mkdir(parents=True, exist_ok=True) - - all_year_data: list[tuple[int, dict[int, int]]] = [] - - # 6. For each run year, write materialized metadata and load symlinks. - for t in run_year_indices: - calendar_year = year_labels[t] - year_dir = path_output_dir / f"year={calendar_year}" - year_dir.mkdir(parents=True, exist_ok=True) - - assignments = assignments_by_year[t] - - # Group buildings by their assigned upgrade for this year. - bldgs_by_upgrade: dict[int, list[int]] = {u: [] for u in all_upgrades} - for bldg_id, upgrade_id in assignments.items(): - bldgs_by_upgrade[upgrade_id].append(bldg_id) - - # Combine metadata from each upgrade, filtering to its assigned buildings. - parts: list[pl.DataFrame] = [] - for uid in all_upgrades: - bldg_ids_for_upgrade = bldgs_by_upgrade[uid] - if not bldg_ids_for_upgrade: - continue - df = upgrade_dfs[uid].filter(pl.col("bldg_id").is_in(bldg_ids_for_upgrade)) - parts.append(df) - - combined = pl.concat(parts) - combined.write_parquet(year_dir / "metadata-sb.parquet") - - # Create loads/ directory with symlinks per building. - loads_out_dir = year_dir / "loads" - loads_out_dir.mkdir(exist_ok=True) - - for uid in all_upgrades: - bldg_ids_for_upgrade = bldgs_by_upgrade[uid] - if not bldg_ids_for_upgrade: - continue - src_loads_dir = _loads_dir(path_resstock_release, state_upper, uid) - bldg_ids_set = set(bldg_ids_for_upgrade) - load_map = _build_load_file_map(src_loads_dir, bldg_ids_set) - - for bldg_id in bldg_ids_for_upgrade: - src_file = load_map.get(bldg_id) - if src_file is None: - raise FileNotFoundError( - f"No load file found for bldg_id={bldg_id} in {src_loads_dir}" - ) - dst = loads_out_dir / src_file.name - if dst.is_symlink() or dst.exists(): - dst.unlink() - os.symlink(src_file.resolve(), dst) - - n_assigned = sum(len(v) for v in bldgs_by_upgrade.values()) - n_hp = n_assigned - len(bldgs_by_upgrade[0]) - print( - f" year={calendar_year}: {n_assigned} buildings " - f"({n_hp} HP-upgraded, {len(bldgs_by_upgrade[0])} baseline)" - ) - all_year_data.append((calendar_year, assignments)) - - # 7. Write scenario CSV (bldg_id, year_, ...) for reference. - csv_path = path_output_dir / "scenario_assignments.csv" - with open(csv_path, "w", newline="", encoding="utf-8") as f: - writer = csv.writer(f) - header = ["bldg_id"] + [f"year_{yr}" for yr, _ in all_year_data] - writer.writerow(header) - for bldg_id in sorted(all_bldg_ids): - row: list[object] = [bldg_id] + [asgn[bldg_id] for _, asgn in all_year_data] - writer.writerow(row) - - print(f"Wrote scenario assignments to {csv_path}") print(f"Done. Materialised {len(run_year_indices)} year(s) to {path_output_dir}") From be5c73d2726e40845c51860275b49f90dc8c071e Mon Sep 17 00:00:00 2001 From: sherryzuo <145484385+sherryzuo@users.noreply.github.com> Date: Mon, 23 Mar 2026 21:04:17 +0000 Subject: [PATCH 17/19] Fix parquet loading and Cambium dist MC handling - patches: use ParquetFile for timestamp read; omit schema for mixed-upgrade symlinked parquets (dictionary vs plain encodings vary) - generate_utility_tx_dx_mc: fix leap-year 8760 rule (use calendar.isleap, not data presence); align timestamp dtype for join - Add test_cambium_dist_mc: busbar_load rename, empty bulk TX path --- tests/test_cambium_dist_mc.py | 171 ++++++++++++++++++ utils/mid/patches.py | 15 +- .../generate_utility_tx_dx_mc.py | 18 +- 3 files changed, 193 insertions(+), 11 deletions(-) create mode 100644 tests/test_cambium_dist_mc.py diff --git a/tests/test_cambium_dist_mc.py b/tests/test_cambium_dist_mc.py new file mode 100644 index 00000000..8a18eb28 --- /dev/null +++ b/tests/test_cambium_dist_mc.py @@ -0,0 +1,171 @@ +"""Tests for Cambium busbar_load dist MC loading and empty bulk TX handling. + +Covers: +- load_cambium_load_profile: busbar_load → load_mw rename, utility column +- Missing required columns raise a clear error +- add_bulk_tx_and_dist_and_sub_tx_marginal_cost with empty/None path_bulk_tx_mc + returns dist-only MC (no bulk TX added) +""" + +from __future__ import annotations + +from datetime import datetime, timezone +from pathlib import Path + +import pandas as pd +import polars as pl +import pytest + +from utils.cairo import add_bulk_tx_and_dist_and_sub_tx_marginal_cost +from utils.pre.marginal_costs.generate_utility_tx_dx_mc import ( + load_cambium_load_profile, +) + + +def _make_cambium_parquet(tmp_path: Path, *, rows: int = 8760) -> Path: + """Write a minimal Cambium-style parquet with timestamp + busbar_load.""" + timestamps = pl.datetime_range( + datetime(2025, 1, 1, 0, 0, 0), + datetime(2025, 12, 31, 23, 0, 0), + interval="1h", + eager=True, + )[:rows] + df = pl.DataFrame( + { + "timestamp": timestamps, + "busbar_load": [1000.0 + i * 0.1 for i in range(rows)], + "energy_cost_enduse": [0.05] * rows, + } + ) + path = tmp_path / "cambium.parquet" + df.write_parquet(path) + return path + + +def _make_dist_mc_parquet(tmp_path: Path, *, rows: int = 8760) -> Path: + """Write a minimal dist+sub-tx MC parquet in the format cairo expects. + + The loader (load_dist_and_sub_tx_marginal_costs) expects columns: + timestamp, mc_total_per_kwh (and optionally mc_upstream_per_kwh, mc_dist_per_kwh). + Timestamps must be tz-naive (the loader tz-localizes to EST). + """ + timestamps = pd.date_range("2025-01-01", periods=rows, freq="h") + df = pd.DataFrame( + { + "timestamp": timestamps, + "mc_total_per_kwh": [0.001] * rows, + "mc_upstream_per_kwh": [0.0005] * rows, + "mc_dist_per_kwh": [0.0005] * rows, + } + ) + path = tmp_path / "dist_mc.parquet" + df.to_parquet(path, index=False) + return path + + +class TestLoadCambiumLoadProfile: + def test_renames_busbar_load_to_load_mw(self, tmp_path: Path) -> None: + path = _make_cambium_parquet(tmp_path) + result = load_cambium_load_profile(str(path), "nyseg", {}) + assert "load_mw" in result.columns + assert "busbar_load" not in result.columns + + def test_adds_utility_column(self, tmp_path: Path) -> None: + path = _make_cambium_parquet(tmp_path) + result = load_cambium_load_profile(str(path), "nyseg", {}) + assert "utility" in result.columns + assert result["utility"].unique().to_list() == ["nyseg"] + + def test_preserves_row_count(self, tmp_path: Path) -> None: + path = _make_cambium_parquet(tmp_path) + result = load_cambium_load_profile(str(path), "nyseg", {}) + assert len(result) == 8760 + + def test_timestamp_column_preserved(self, tmp_path: Path) -> None: + path = _make_cambium_parquet(tmp_path) + result = load_cambium_load_profile(str(path), "nyseg", {}) + assert "timestamp" in result.columns + + def test_missing_busbar_load_raises(self, tmp_path: Path) -> None: + df = pl.DataFrame( + { + "timestamp": [datetime(2025, 1, 1)] * 10, + "energy_cost_enduse": [0.05] * 10, + } + ) + path = tmp_path / "bad.parquet" + df.write_parquet(path) + with pytest.raises(ValueError, match="busbar_load"): + load_cambium_load_profile(str(path), "nyseg", {}) + + def test_missing_timestamp_raises(self, tmp_path: Path) -> None: + df = pl.DataFrame({"busbar_load": [1000.0] * 10}) + path = tmp_path / "bad.parquet" + df.write_parquet(path) + with pytest.raises(ValueError, match="timestamp"): + load_cambium_load_profile(str(path), "nyseg", {}) + + def test_load_mw_values_match_busbar_load(self, tmp_path: Path) -> None: + path = _make_cambium_parquet(tmp_path) + result = load_cambium_load_profile(str(path), "nyseg", {}) + expected = [1000.0 + i * 0.1 for i in range(8760)] + actual = result["load_mw"].to_list() + assert actual == pytest.approx(expected, rel=1e-6) + + +class TestAddBulkTxEmptyPath: + """add_bulk_tx_and_dist_and_sub_tx_marginal_cost with empty/None bulk TX path + returns dist-only MC (no bulk TX contribution).""" + + def _make_target_index(self) -> pd.DatetimeIndex: + return pd.date_range("2025-01-01", periods=8760, freq="h", tz="EST") + + def test_empty_string_path_skips_bulk_tx(self, tmp_path: Path) -> None: + path_dist = _make_dist_mc_parquet(tmp_path) + target_idx = self._make_target_index() + + result = add_bulk_tx_and_dist_and_sub_tx_marginal_cost( + path_dist_and_sub_tx_mc=path_dist, + path_bulk_tx_mc="", + target_index=target_idx, + ) + + assert result is not None + assert len(result) == 8760 + assert result.sum() == pytest.approx(0.001 * 8760, rel=1e-4) + + def test_none_path_skips_bulk_tx(self, tmp_path: Path) -> None: + path_dist = _make_dist_mc_parquet(tmp_path) + target_idx = self._make_target_index() + + result = add_bulk_tx_and_dist_and_sub_tx_marginal_cost( + path_dist_and_sub_tx_mc=path_dist, + path_bulk_tx_mc=None, + target_index=target_idx, + ) + + assert result.sum() == pytest.approx(0.001 * 8760, rel=1e-4) + + def test_whitespace_only_path_skips_bulk_tx(self, tmp_path: Path) -> None: + path_dist = _make_dist_mc_parquet(tmp_path) + target_idx = self._make_target_index() + + result = add_bulk_tx_and_dist_and_sub_tx_marginal_cost( + path_dist_and_sub_tx_mc=path_dist, + path_bulk_tx_mc=" ", + target_index=target_idx, + ) + + assert result.sum() == pytest.approx(0.001 * 8760, rel=1e-4) + + def test_result_name_is_delivery_mc(self, tmp_path: Path) -> None: + path_dist = _make_dist_mc_parquet(tmp_path) + target_idx = self._make_target_index() + + result = add_bulk_tx_and_dist_and_sub_tx_marginal_cost( + path_dist_and_sub_tx_mc=path_dist, + path_bulk_tx_mc=None, + target_index=target_idx, + ) + + assert result.name == "Marginal Distribution Costs ($/kWh)" diff --git a/utils/mid/patches.py b/utils/mid/patches.py index bab0e2b9..ab230947 100644 --- a/utils/mid/patches.py +++ b/utils/mid/patches.py @@ -94,12 +94,14 @@ def _return_loads_combined( # 2. Read timestamps from the first file only — all ResStock buildings share # the same 8760-hour series, so one file is sufficient. - first_table = pq.read_table(paths[0], columns=["timestamp"]) - ts_first = first_table.column("timestamp").to_numpy() + # Use ParquetFile to avoid dataset/Hive-partition inference that can + # fail when the parent path contains partition keys (e.g. year=2025). + pf = pq.ParquetFile(paths[0]) + ts_first = pf.read(columns=["timestamp"]).column("timestamp").to_numpy() + del pf assert len(ts_first) == 8760, ( f"Expected 8760 rows in first file, got {len(ts_first)}" ) - del first_table # 3. Compute timeshift parameters from source timestamps source_year = int(pd.Timestamp(ts_first[0]).year) @@ -118,14 +120,15 @@ def _return_loads_combined( unique_times = unique_times.tz_localize(force_tz) # 5. Read only the 3 data columns from all files (skip bldg_id and timestamp). - # Use the first file's schema — ResStock load files share identical schemas. - schema = pq.read_schema(paths[0]) + # Don't force a unified schema — mixed-upgrade symlinked files may have + # different encodings for columns we don't need (e.g. dictionary-encoded + # vs plain int32 for `year`). Omitting `schema` lets PyArrow unify per-file. _DATA_COLS = [ "out.electricity.total.energy_consumption", "out.electricity.pv.energy_consumption", "out.natural_gas.total.energy_consumption", ] - ds = pad.dataset(paths, format="parquet", schema=schema) + ds = pad.dataset(paths, format="parquet") table = ds.to_table(columns=_DATA_COLS) _log_mem("after to_table (3 data cols, arrow)") diff --git a/utils/pre/marginal_costs/generate_utility_tx_dx_mc.py b/utils/pre/marginal_costs/generate_utility_tx_dx_mc.py index ccdd6813..c1fe364f 100644 --- a/utils/pre/marginal_costs/generate_utility_tx_dx_mc.py +++ b/utils/pre/marginal_costs/generate_utility_tx_dx_mc.py @@ -39,6 +39,7 @@ """ import argparse +import calendar import io from datetime import datetime, timedelta from pathlib import Path @@ -196,13 +197,16 @@ def normalize_load_to_cairo_8760( if df.is_empty(): raise ValueError(f"No load rows found for load_year={year_load}") - # Cairo leap-year rule: if Feb 29 exists, drop Dec 31. - has_feb29 = df.select( + # Cairo leap-year rule: drop Dec 31 so the year has exactly 8760 hours. + # Check the calendar year (not just data) because some sources (e.g. Cambium) + # already omit Feb 29 for leap years, so the data won't contain it. + is_leap = calendar.isleap(year_load) + data_has_feb29 = df.select( ((pl.col("timestamp").dt.month() == 2) & (pl.col("timestamp").dt.day() == 29)) .any() .alias("has_feb29") ).item() - if has_feb29: + if data_has_feb29: print( " Leap-year pattern detected (Feb 29 present); dropping Dec 31 to match Cairo." ) @@ -224,7 +228,7 @@ def normalize_load_to_cairo_8760( agg_exprs.append(pl.col("utility").first().alias("utility")) df = df.group_by("timestamp").agg(agg_exprs).sort("timestamp") - # Build expected 8760 index for this year. + # Build expected 8760 index for this year (drop Dec 31 for leap years). start = datetime(year_load, 1, 1, 0, 0, 0) end = datetime(year_load, 12, 31, 23, 0, 0) expected = [] @@ -232,10 +236,14 @@ def normalize_load_to_cairo_8760( while cur <= end: expected.append(cur) cur += timedelta(hours=1) - if has_feb29: + if is_leap: expected = [t for t in expected if not (t.month == 12 and t.day == 31)] expected_df = pl.DataFrame({"timestamp": expected}) + # Align timestamp precision so the join key dtypes match. + if df["timestamp"].dtype != expected_df["timestamp"].dtype: + expected_df = expected_df.cast({"timestamp": df["timestamp"].dtype}) + # Reindex and fill any missing hours. df = expected_df.join(df, on="timestamp", how="left").sort("timestamp") missing_before_fill = df.filter(pl.col("load_mw").is_null()).height From ede147dcffc45abd1078b43318d210dce4a46079 Mon Sep 17 00:00:00 2001 From: sherryzuo <145484385+sherryzuo@users.noreply.github.com> Date: Mon, 23 Mar 2026 21:04:26 +0000 Subject: [PATCH 18/19] Extend adoption scenario YAML generation - generate_adoption_scenario_yamls: add --cambium-supply, --cambium-dist-mc-base, --adoption-tariff-dir, --residual-cost-frac; Cambium GEA/BA options - create_scenario_yamls: support adoption tariff dir - Extend tests for generate/create and scenario config --- .../test_generate_adoption_scenario_yamls.py | 170 ++++++++++++++++++ tests/test_create_scenario_yamls.py | 44 +++++ tests/test_scenario_config.py | 98 ++++++++++ utils/pre/create_scenario_yamls.py | 9 + utils/pre/generate_adoption_scenario_yamls.py | 55 +++++- 5 files changed, 374 insertions(+), 2 deletions(-) diff --git a/tests/pre/test_generate_adoption_scenario_yamls.py b/tests/pre/test_generate_adoption_scenario_yamls.py index 88c249dd..e977e8b4 100644 --- a/tests/pre/test_generate_adoption_scenario_yamls.py +++ b/tests/pre/test_generate_adoption_scenario_yamls.py @@ -433,3 +433,173 @@ def test_t_token_replaced_in_supply_mc_paths(self, tmp_path: Path) -> None: # key 201: year_index 1 (2030) - t= should be updated to t=2030 assert "t=2030" in result["runs"][201]["path_supply_energy_mc"] assert "t=2025" not in result["runs"][201]["path_supply_energy_mc"] + + +class TestHiveLoadsPath: + """path_resstock_loads is the hive-leaf upgrade=00/ dir, not a flat loads/ dir.""" + + def _run(self, tmp_path: Path) -> dict[str, Any]: + path_base, path_adopt, path_mat, path_out = _make_test_inputs(tmp_path) + main( + [ + "--base-scenario", + str(path_base), + "--runs", + "1", + "--adoption-config", + str(path_adopt), + "--materialized-dir", + str(path_mat), + "--output", + str(path_out), + ] + ) + return yaml.safe_load(path_out.read_text()) + + def test_loads_path_contains_load_curve_hourly(self, tmp_path: Path) -> None: + result = self._run(tmp_path) + for entry in result["runs"].values(): + assert "load_curve_hourly" in entry["path_resstock_loads"] + + def test_loads_path_contains_state_partition(self, tmp_path: Path) -> None: + result = self._run(tmp_path) + for entry in result["runs"].values(): + assert "state=NY" in entry["path_resstock_loads"] + + def test_loads_path_contains_upgrade_partition(self, tmp_path: Path) -> None: + result = self._run(tmp_path) + for entry in result["runs"].values(): + assert "upgrade=00" in entry["path_resstock_loads"] + + def test_loads_path_does_not_contain_flat_loads(self, tmp_path: Path) -> None: + result = self._run(tmp_path) + for entry in result["runs"].values(): + assert "/loads/" not in entry["path_resstock_loads"] + + def test_loads_path_year_matches_run_year(self, tmp_path: Path) -> None: + result = self._run(tmp_path) + assert "year=2025" in result["runs"][101]["path_resstock_loads"] + assert "year=2030" in result["runs"][201]["path_resstock_loads"] + + +# Base run with run_includes_subclasses=True for adoption-tariff-dir tests. +BASE_RUNS_SUBCLASS: dict[str, Any] = { + "runs": { + 5: { + "run_name": "ny_nyseg_run5_up00_precalc__hp_seasonal_vs_flat", + "state": "NY", + "utility": "nyseg", + "run_type": "precalc", + "upgrade": "0", + "path_resstock_metadata": "/old/metadata-sb.parquet", + "path_resstock_loads": "/old/loads/", + "path_dist_and_sub_tx_mc": "s3://dist/year=2025/data.parquet", + "path_supply_energy_mc": "s3://supply/year=2025/zero.parquet", + "path_supply_capacity_mc": "s3://supply/year=2025/zero.parquet", + "path_bulk_tx_mc": "", + "utility_revenue_requirement": None, + "run_includes_supply": False, + "run_includes_subclasses": True, + "year_run": 2025, + "path_tariffs_electric": { + "hp": "tariffs/electric/nyseg_hp_seasonal.json", + "non-hp": "tariffs/electric/nyseg_nonhp_flat.json", + }, + }, + } +} + + +class TestAdoptionTariffDir: + """--adoption-tariff-dir rewrites hp/non-hp paths for subclass runs only.""" + + def _run( + self, + tmp_path: Path, + adoption_tariff_dir: str | None = None, + runs: str = "5", + ) -> dict[str, Any]: + path_base = tmp_path / "scenarios_nyseg.yaml" + _write_yaml(path_base, BASE_RUNS_SUBCLASS) + path_adopt = tmp_path / "adoption.yaml" + _write_yaml(path_adopt, ADOPTION_CONFIG) + path_mat = tmp_path / "materialized" + for yr in [2025, 2030]: + (path_mat / f"year={yr}").mkdir(parents=True) + path_out = tmp_path / "out.yaml" + args = [ + "--base-scenario", + str(path_base), + "--runs", + runs, + "--adoption-config", + str(path_adopt), + "--materialized-dir", + str(path_mat), + "--output", + str(path_out), + ] + if adoption_tariff_dir is not None: + args += ["--adoption-tariff-dir", adoption_tariff_dir] + main(args) + return yaml.safe_load(path_out.read_text()) + + def test_hp_path_rewritten_with_tariff_dir(self, tmp_path: Path) -> None: + result = self._run(tmp_path, adoption_tariff_dir="/tariffs/adoption/cfg") + entry = result["runs"][105] + assert "/tariffs/adoption/cfg/year=2025" in entry["path_tariffs_electric"]["hp"] + assert "nyseg_hp_seasonal.json" in entry["path_tariffs_electric"]["hp"] + + def test_non_hp_path_rewritten_with_tariff_dir(self, tmp_path: Path) -> None: + result = self._run(tmp_path, adoption_tariff_dir="/tariffs/adoption/cfg") + entry = result["runs"][105] + assert ( + "/tariffs/adoption/cfg/year=2025" + in entry["path_tariffs_electric"]["non-hp"] + ) + assert "nyseg_nonhp_flat.json" in entry["path_tariffs_electric"]["non-hp"] + + def test_tariff_dir_includes_correct_year(self, tmp_path: Path) -> None: + result = self._run(tmp_path, adoption_tariff_dir="/tariffs/adoption/cfg") + entry_2030 = result["runs"][205] + assert "year=2030" in entry_2030["path_tariffs_electric"]["hp"] + assert "year=2025" not in entry_2030["path_tariffs_electric"]["hp"] + + def test_without_tariff_dir_paths_unchanged(self, tmp_path: Path) -> None: + result = self._run(tmp_path, adoption_tariff_dir=None) + entry = result["runs"][105] + assert ( + entry["path_tariffs_electric"]["hp"] + == "tariffs/electric/nyseg_hp_seasonal.json" + ) + assert ( + entry["path_tariffs_electric"]["non-hp"] + == "tariffs/electric/nyseg_nonhp_flat.json" + ) + + def test_non_subclass_run_not_affected(self, tmp_path: Path) -> None: + """Runs without run_includes_subclasses keep their original tariff paths.""" + path_base, path_adopt, path_mat, path_out = _make_test_inputs(tmp_path) + main( + [ + "--base-scenario", + str(path_base), + "--runs", + "1", + "--adoption-config", + str(path_adopt), + "--materialized-dir", + str(path_mat), + "--output", + str(path_out), + "--adoption-tariff-dir", + "/tariffs/adoption/cfg", + ] + ) + result = yaml.safe_load(path_out.read_text()) + # Run 1 uses path_tariffs_electric.all (not hp/non-hp); must be untouched. + for entry in result["runs"].values(): + tariff_elec = entry.get("path_tariffs_electric", {}) + assert "hp" not in tariff_elec or "/tariffs/adoption/cfg" not in str( + tariff_elec.get("hp", "") + ) diff --git a/tests/test_create_scenario_yamls.py b/tests/test_create_scenario_yamls.py index 72a285e7..1aac8f46 100644 --- a/tests/test_create_scenario_yamls.py +++ b/tests/test_create_scenario_yamls.py @@ -74,3 +74,47 @@ def test_row_to_run_omits_path_tou_supply_mc_when_blank() -> None: run = _row_to_run(row, headers) assert "path_tou_supply_mc" not in run + + +def test_row_to_run_includes_residual_cost_frac_when_set() -> None: + """residual_cost_frac cell value is parsed as float and included.""" + row = _base_row() + row["residual_cost_frac"] = "0.0" + headers = list(row.keys()) + + run = _row_to_run(row, headers) + + assert run.get("residual_cost_frac") == 0.0 + + +def test_row_to_run_omits_residual_cost_frac_when_blank() -> None: + """Empty residual_cost_frac cell is omitted from output (existing runs unaffected).""" + row = _base_row() + row["residual_cost_frac"] = "" + headers = list(row.keys()) + + run = _row_to_run(row, headers) + + assert "residual_cost_frac" not in run + + +def test_row_to_run_omits_residual_cost_frac_when_column_absent() -> None: + """Column absence (older sheets) does not break parsing.""" + row = _base_row() + headers = [k for k in row if k != "residual_cost_frac"] + + run = _row_to_run(row, headers) + + assert "residual_cost_frac" not in run + + +def test_row_to_run_residual_cost_frac_invalid_raises() -> None: + """Non-numeric residual_cost_frac raises a clear error.""" + row = _base_row() + row["residual_cost_frac"] = "not-a-number" + headers = list(row.keys()) + + import pytest + + with pytest.raises(ValueError, match="residual_cost_frac"): + _row_to_run(row, headers) diff --git a/tests/test_scenario_config.py b/tests/test_scenario_config.py index 05f83b5e..083ceb30 100644 --- a/tests/test_scenario_config.py +++ b/tests/test_scenario_config.py @@ -7,6 +7,7 @@ from __future__ import annotations from pathlib import Path +from typing import Any import pytest import yaml @@ -193,3 +194,100 @@ def test_nested_subclass_format(self, tmp_path: Path) -> None: "hp_supply": 500.0, "nonhp_supply": 1000.0, } + + +class TestResidualCostFracYamlGuard: + """_build_settings_from_yaml_run: residual_cost_frac vs utility_revenue_requirement guard. + + Uses actual NYSEG scenario run 1 as the base (all file paths are real), then + overrides only the revenue-requirement-related fields to test the guard. + """ + + _SCENARIOS_NYSEG = ( + Path(__file__).resolve().parents[1] + / "rate_design" + / "hp_rates" + / "ny" + / "config" + / "scenarios" + / "scenarios_nyseg.yaml" + ) + + def _load_base_run(self) -> dict[str, Any]: + """Load run 1 from the real NYSEG scenarios YAML.""" + with self._SCENARIOS_NYSEG.open(encoding="utf-8") as f: + data = yaml.safe_load(f) + return dict(data["runs"][1]) + + def test_residual_cost_frac_and_urr_both_set_raises(self, tmp_path: Path) -> None: + """Setting both residual_cost_frac and utility_revenue_requirement raises.""" + from rate_design.hp_rates.run_scenario import _build_settings_from_yaml_run + + run = self._load_base_run() + run["residual_cost_frac"] = 0.0 + # utility_revenue_requirement already set in run 1; keep it → should raise + + with pytest.raises(ValueError, match="residual_cost_frac"): + _build_settings_from_yaml_run( + run=run, + run_num=1, + state="NY", + output_dir_override=tmp_path, + run_name_override="test", + ) + + def test_residual_cost_frac_with_urr_none_is_allowed(self, tmp_path: Path) -> None: + """residual_cost_frac=0.0 with utility_revenue_requirement: none is valid.""" + from rate_design.hp_rates.run_scenario import _build_settings_from_yaml_run + + run = self._load_base_run() + run["residual_cost_frac"] = 0.0 + run["utility_revenue_requirement"] = None + + settings = _build_settings_from_yaml_run( + run=run, + run_num=1, + state="NY", + output_dir_override=tmp_path, + run_name_override="test", + ) + + assert settings.residual_cost_frac == pytest.approx(0.0) + assert settings.rr_total == pytest.approx(0.0) + + def test_residual_cost_frac_with_urr_absent_is_allowed( + self, tmp_path: Path + ) -> None: + """residual_cost_frac=0.0 with utility_revenue_requirement absent is valid.""" + from rate_design.hp_rates.run_scenario import _build_settings_from_yaml_run + + run = self._load_base_run() + run["residual_cost_frac"] = 0.0 + run.pop("utility_revenue_requirement", None) + + settings = _build_settings_from_yaml_run( + run=run, + run_num=1, + state="NY", + output_dir_override=tmp_path, + run_name_override="test", + ) + + assert settings.residual_cost_frac == pytest.approx(0.0) + + def test_residual_cost_frac_absent_defaults_to_none(self, tmp_path: Path) -> None: + """When residual_cost_frac is not in the run dict, it defaults to None.""" + from rate_design.hp_rates.run_scenario import _build_settings_from_yaml_run + + run = self._load_base_run() + # residual_cost_frac absent; utility_revenue_requirement already set in run 1 + + settings = _build_settings_from_yaml_run( + run=run, + run_num=1, + state="NY", + output_dir_override=tmp_path, + run_name_override="test", + ) + + assert settings.residual_cost_frac is None diff --git a/utils/pre/create_scenario_yamls.py b/utils/pre/create_scenario_yamls.py index d6058daf..37e6e135 100644 --- a/utils/pre/create_scenario_yamls.py +++ b/utils/pre/create_scenario_yamls.py @@ -327,6 +327,15 @@ def parse_required_float(key: str) -> float: run["elasticity"] = parse_required_float("elasticity") + residual_cost_frac_raw = get_optional("residual_cost_frac") + if residual_cost_frac_raw: + try: + run["residual_cost_frac"] = float(residual_cost_frac_raw) + except ValueError as exc: + raise ValueError( + f"residual_cost_frac must be a float, got {residual_cost_frac_raw!r}" + ) from exc + return run diff --git a/utils/pre/generate_adoption_scenario_yamls.py b/utils/pre/generate_adoption_scenario_yamls.py index 86ad97dc..5379efbc 100644 --- a/utils/pre/generate_adoption_scenario_yamls.py +++ b/utils/pre/generate_adoption_scenario_yamls.py @@ -127,6 +127,20 @@ def build_parser() -> argparse.ArgumentParser: "{base}/utility={utility}/year={calendar_year}/data.parquet." ), ) + p.add_argument( + "--adoption-tariff-dir", + type=str, + default=None, + dest="adoption_tariff_dir", + help=( + "Base directory template for per-year adoption tariffs. When set, " + "for run entries with run_includes_subclasses=true (runs 5/6), " + "path_tariffs_electric.hp is rewritten to " + "/year=/_hp_seasonal.json and " + ".non-hp to /year=/_nonhp_flat.json. " + "The directory must exist before the run (created by run-adoption-all)." + ), + ) return p @@ -284,6 +298,8 @@ def main(argv: list[str] | None = None) -> None: print(f" Cambium supply MCs: gea={args.cambium_gea}, ba={args.cambium_ba}") if args.cambium_dist_mc_base: print(f" Cambium dist MC base: {args.cambium_dist_mc_base}") + if args.adoption_tariff_dir: + print(f" Adoption tariff dir: {args.adoption_tariff_dir}/year=/") # 3. Build generated run entries. output_runs: dict[int, dict[str, Any]] = {} @@ -292,8 +308,6 @@ def main(argv: list[str] | None = None) -> None: meta_path = str( path_materialized_dir / f"year={calendar_year}" / "metadata-sb.parquet" ) - loads_path = str(path_materialized_dir / f"year={calendar_year}" / "loads" / "") - for run_num in run_nums: base_run = base_runs[run_num] old_year_run = int(base_run.get("year_run", calendar_year)) @@ -301,6 +315,22 @@ def main(argv: list[str] | None = None) -> None: # Deep-copy so base configs remain unmodified. run_entry: dict[str, Any] = copy.deepcopy(base_run) + # Build the hive-leaf loads path for this run's state. + # materialize() writes symlinks under: + # year=YYYY/load_curve_hourly/state=/upgrade=00/ + # CAIRO's build_bldg_id_to_load_filepath does a flat glob("*.parquet") + # on path_resstock_loads, so it must point at the upgrade=00 leaf. + # scan_resstock_loads receives the year=YYYY/ base via the Justfile. + run_state: str = str(run_entry.get("state", "")).upper() + loads_path = str( + path_materialized_dir + / f"year={calendar_year}" + / "load_curve_hourly" + / f"state={run_state}" + / "upgrade=00" + / "" + ) + # Replace ResStock data paths. run_entry["path_resstock_metadata"] = meta_path run_entry["path_resstock_loads"] = loads_path @@ -343,6 +373,27 @@ def main(argv: list[str] | None = None) -> None: run_entry["residual_cost_frac"] = args.residual_cost_frac run_entry["utility_revenue_requirement"] = None + # Rewrite seasonal tariff paths for subclass runs (5/6) to + # point at per-year files written by run-adoption-all. + if args.adoption_tariff_dir and bool( + run_entry.get("run_includes_subclasses", False) + ): + utility_val = str(run_entry.get("utility", "")) + tariff_dir = ( + f"{args.adoption_tariff_dir.rstrip('/')}/year={calendar_year}" + ) + tariff_elec = run_entry.get("path_tariffs_electric", {}) + if isinstance(tariff_elec, dict): + if "hp" in tariff_elec: + tariff_elec["hp"] = ( + f"{tariff_dir}/{utility_val}_hp_seasonal.json" + ) + if "non-hp" in tariff_elec: + tariff_elec["non-hp"] = ( + f"{tariff_dir}/{utility_val}_nonhp_flat.json" + ) + run_entry["path_tariffs_electric"] = tariff_elec + output_key = (year_index + 1) * 100 + run_num output_runs[output_key] = run_entry print( From 4d090d466f42fadebfbb6f7d2f0356531e73b21d Mon Sep 17 00:00:00 2001 From: sherryzuo <145484385+sherryzuo@users.noreply.github.com> Date: Mon, 23 Mar 2026 21:04:33 +0000 Subject: [PATCH 19/19] Update Justfile for adoption pipeline; add NYSEG adoption scenarios - path_resstock_root, resstock_release_key; materialize-adoption uses root+release - generate-adoption-scenarios: Cambium supply/dist, 0% residual, adoption tariff dir - run-adoption-all: precalc tariffs per year, seasonal derivation, Cambium dist MCs - Add scenarios_nyseg_adoption.yaml for NYSEG adoption runs --- rate_design/hp_rates/Justfile | 200 ++--- .../scenarios/scenarios_nyseg_adoption.yaml | 732 ++++++++++++++++++ 2 files changed, 841 insertions(+), 91 deletions(-) create mode 100644 rate_design/hp_rates/ny/config/scenarios/scenarios_nyseg_adoption.yaml diff --git a/rate_design/hp_rates/Justfile b/rate_design/hp_rates/Justfile index efcb2156..de433279 100644 --- a/rate_design/hp_rates/Justfile +++ b/rate_design/hp_rates/Justfile @@ -79,6 +79,8 @@ path_bulk_tx_mc := env_var_or_default('BULK_TX_MC', "") path_supply_energy_mc := env_var_or_default('SUPPLY_ENERGY_MC', "s3://data.sb/switchbox/marginal_costs/" + state + "/supply/energy/utility=" + utility + "/year=" + mc_year + "/data.parquet") path_supply_capacity_mc := env_var_or_default('SUPPLY_CAPACITY_MC', "s3://data.sb/switchbox/marginal_costs/" + state + "/supply/capacity/utility=" + utility + "/year=" + mc_year + "/data.parquet") path_supply_ancillary_mc := env_var_or_default('SUPPLY_ANCILLARY_MC', "") +path_resstock_root := "/ebs/data/nrel/resstock" +resstock_release_key := "res_2024_amy2018_2" path_resstock_release := "/ebs/data/nrel/resstock/res_2024_amy2018_2_sb" path_resstock_metadata := path_resstock_release + "/metadata" path_utility_assignment := path_resstock_release + "/metadata_utility" @@ -342,7 +344,8 @@ create-dist-and-sub-tx-mc-data-all: # Writes to the cambium_dist_and_sub_tx/ prefix to avoid overwriting EIA-based dist MCs. # # Example: -# just s ny create-dist-mc-cambium 2030 + +# just s ny create-dist-mc-cambium 2030 create-dist-mc-cambium year_mc=year: uv run python {{ path_repo }}/utils/pre/marginal_costs/generate_utility_tx_dx_mc.py \ --state {{ state_upper }} \ @@ -358,7 +361,8 @@ create-dist-mc-cambium year_mc=year: # Generate Cambium dist MCs for all adoption trajectory years (2025–2050). # # Example: -# just s ny create-cambium-dist-mc-all-years + +# just s ny create-cambium-dist-mc-all-years create-cambium-dist-mc-all-years: #!/usr/bin/env bash set -euo pipefail @@ -717,7 +721,8 @@ path_adoption_config_dir := path_config / "adoption" # adoption config YAML + a curve-fit diagnostic plot. # # Example: -# just s ny fit-adoption-config nyca_electrification + +# just s ny fit-adoption-config nyca_electrification fit-adoption-config config_name="nyca_electrification": uv run python {{ path_repo }}/utils/pre/fit_adoption_config.py \ --output "{{ path_adoption_config_dir }}/{{ config_name }}.yaml" \ @@ -729,34 +734,51 @@ fit-adoption-config config_name="nyca_electrification": # writes year=/ directories under the adoption output path. # # Example: -# just s ny materialize-adoption nyca_electrification + +# just s ny materialize-adoption nyca_electrification materialize-adoption config_name="default": uv run python {{ path_repo }}/utils/pre/materialize_mixed_upgrade.py \ --state "{{ state }}" \ --utility "{{ utility }}" \ --adoption-config "{{ path_adoption_config_dir }}/{{ config_name }}.yaml" \ - --path-resstock-release "{{ path_resstock_release }}" \ + --path-resstock-release "{{ path_resstock_root }}" \ + --release "{{ resstock_release_key }}" \ --output-dir "{{ path_resstock_release }}/adoption/{{ config_name }}" # Generate per-year scenario YAML entries for adoption runs. +# Uses Cambium supply MCs (energy_cost_enduse / capacity_cost_enduse), Cambium +# busbar_load dist MCs, and 0% residual cost so revenue requirement = total MC. # Output: config/scenarios/scenarios__adoption.yaml # +# When config_name is set, --adoption-tariff-dir is passed so that runs 5/6 +# in the YAML reference per-year seasonal tariff files instead of the shared +# static tariff. Those files are written by run-adoption-all between run-2 +# and run-5 for each year. +# # Example: -# just s ny generate-adoption-scenarios nyca_electrification 1,2,5,6 + +# just s ny generate-adoption-scenarios nyca_electrification 1,2,5,6 generate-adoption-scenarios config_name="default" runs="1,2,5,6": uv run python {{ path_repo }}/utils/pre/generate_adoption_scenario_yamls.py \ --base-scenario "{{ path_scenario_config }}" \ --runs "{{ runs }}" \ --adoption-config "{{ path_adoption_config_dir }}/{{ config_name }}.yaml" \ --materialized-dir "{{ path_resstock_release }}/adoption/{{ config_name }}" \ - --output "{{ path_scenarios }}/scenarios_{{ utility }}_adoption.yaml" + --output "{{ path_scenarios }}/scenarios_{{ utility }}_adoption.yaml" \ + --residual-cost-frac 0.0 \ + --cambium-supply \ + --cambium-gea NYISO \ + --cambium-ba {{ cambium_ba }} \ + --cambium-dist-mc-base "s3://data.sb/switchbox/marginal_costs/{{ state }}/cambium_dist_and_sub_tx" \ + --adoption-tariff-dir "{{ path_tariffs_electric }}/adoption/{{ config_name }}" # Run a single adoption scenario by (year-indexed) run number. # Run keys use the scheme (year_index + 1) * 100 + base_run_num, matching the # output of generate_adoption_scenario_yamls.py (e.g. 101, 102, 201, 202, ...). # # Example: -# just s ny run-adoption-scenario 101 + +# just s ny run-adoption-scenario 101 run-adoption-scenario run_num: #!/usr/bin/env bash set -euo pipefail @@ -773,102 +795,98 @@ run-adoption-scenario run_num: --output-dir "{{ path_outputs_base }}/${RDP_BATCH}" \ 2>&1 | tee "${log_file}" -# Orchestrate the full adoption pipeline: materialize → generate scenarios → run all. +# Orchestrate the full adoption pipeline: materialize → Cambium dist MCs → generate scenarios → run all. +# All runs use Cambium supply MCs, Cambium busbar_load dist MCs, and 0% residual cost. # Iterates over all (year × run) combinations using the key scheme # (year_index + 1) * 100 + base_run_num produced by generate_adoption_scenario_yamls.py. # +# For each year the loop: +# 1. Runs precalc-flat runs (1, 2) and copies their calibrated tariffs to +# tariffs/electric/adoption//year=/. +# 2. Derives per-year seasonal tariffs from run-1/2 outputs + that year's +# mixed-upgrade loads (hive-partitioned under the materialized dir). +# 3. Runs seasonal-precalc runs (5, 6) and copies their calibrated tariffs. +# # Example: -# RDP_BATCH=ny_20260320_adoption just s ny run-adoption-all nyca_electrification 1,2 + +# RDP_BATCH=ny_20260320_adoption just s ny run-adoption-all nyca_electrification 1,2,5,6 run-adoption-all config_name="default" runs="1,2,5,6": #!/usr/bin/env bash set -euo pipefail : "${RDP_BATCH:?Set RDP_BATCH before running}" export RDP_BATCH just materialize-adoption "{{ config_name }}" + just create-cambium-dist-mc-all-years just generate-adoption-scenarios "{{ config_name }}" "{{ runs }}" - # Determine number of run years from the adoption config. - run_years_count=$(uv run python -c " -import yaml -with open('{{ path_adoption_config_dir }}/{{ config_name }}.yaml') as f: - cfg = yaml.safe_load(f) -years = cfg.get('run_years', cfg.get('year_labels', [])) -print(len(years)) -") - IFS=',' read -ra base_nums <<< "{{ runs }}" - for yi in $(seq 1 "$run_years_count"); do - for base_run in "${base_nums[@]}"; do - key=$((yi * 100 + base_run)) - echo ">> run-adoption-all: run-${key} (year_index=$((yi - 1)), base_run=${base_run})" >&2 + adoption_yaml="{{ path_scenarios }}/scenarios_{{ utility }}_adoption.yaml" + adoption_base="{{ path_resstock_release }}/adoption/{{ config_name }}" + tariffs_adoption_base="{{ path_tariffs_electric }}/adoption/{{ config_name }}" + # Read calendar years from the adoption config (one per line). + mapfile -t year_list < <(uv run python "{{ path_repo }}/utils/pre/list_adoption_years.py" \ + "{{ path_adoption_config_dir }}/{{ config_name }}.yaml") + IFS=',' read -ra all_runs <<< "{{ runs }}" + for yi in "${!year_list[@]}"; do + year="${year_list[$yi]}" + key_prefix=$(( (yi + 1) * 100 )) + tariff_dir="${tariffs_adoption_base}/year=${year}" + mkdir -p "${tariff_dir}" + loads_base="${adoption_base}/year=${year}" + echo ">> run-adoption-all: year=${year} (yi=${yi}, key_prefix=${key_prefix})" >&2 + # --- Runs 1 and 2: precalc flat --- + for base_run in "${all_runs[@]}"; do + [[ "${base_run}" == "1" || "${base_run}" == "2" ]] || continue + key=$(( key_prefix + base_run )) + echo ">> run-adoption-all: run-${key} (year=${year}, base_run=${base_run})" >&2 just run-adoption-scenario "${key}" + run_dir=$(bash "{{ latest_output }}" "${adoption_yaml}" "${key}") + just copy-calibrated-tariff-from-run "${run_dir}" "${tariff_dir}" done - done - -# Generate per-year Cambium scenario YAML entries for adoption runs. -# Uses Cambium busbar_load dist MCs and Cambium supply MCs; 0% residual cost. -# Output: config/scenarios/scenarios__adoption_cambium.yaml -# -# Example: -# just s ny generate-adoption-scenarios-cambium nyca_electrification 1,2,5,6 -generate-adoption-scenarios-cambium config_name="default" runs="1,2,5,6": - uv run python {{ path_repo }}/utils/pre/generate_adoption_scenario_yamls.py \ - --base-scenario "{{ path_scenario_config }}" \ - --runs "{{ runs }}" \ - --adoption-config "{{ path_adoption_config_dir }}/{{ config_name }}.yaml" \ - --materialized-dir "{{ path_resstock_release }}/adoption/{{ config_name }}" \ - --output "{{ path_scenarios }}/scenarios_{{ utility }}_adoption_cambium.yaml" \ - --residual-cost-frac 0.0 \ - --cambium-supply \ - --cambium-gea NYISO \ - --cambium-ba {{ cambium_ba }} \ - --cambium-dist-mc-base "s3://data.sb/switchbox/marginal_costs/{{ state }}/cambium_dist_and_sub_tx" - -# Run a single Cambium adoption scenario by (year-indexed) run number. -# Uses scenarios__adoption_cambium.yaml generated by generate-adoption-scenarios-cambium. -# -# Example: -# just s ny run-adoption-scenario-cambium 101 -run-adoption-scenario-cambium run_num: - #!/usr/bin/env bash - set -euo pipefail - : "${RDP_BATCH:?Set RDP_BATCH before running}" - export RDP_BATCH - log_dir="${HOME}/rdp_run_logs" - mkdir -p "${log_dir}" - log_file="${log_dir}/{{ utility }}_adoption_cambium_run{{ run_num }}_${RDP_BATCH}.log" - echo ">> run-adoption-scenario-cambium {{ run_num }}: logging to ${log_file}" >&2 - uv run python {{ path_repo }}/rate_design/hp_rates/run_scenario.py \ - --state "{{ state }}" \ - --scenario-config "{{ path_scenarios }}/scenarios_{{ utility }}_adoption_cambium.yaml" \ - --run-num "{{ run_num }}" \ - --output-dir "{{ path_outputs_base }}/${RDP_BATCH}" \ - 2>&1 | tee "${log_file}" - -# Full Cambium adoption pipeline: materialize → dist MCs → scenarios → run all. -# -# Example: -# RDP_BATCH=ny_20260320_adoption just s ny run-adoption-all-cambium nyca_electrification 1,2,5,6 -run-adoption-all-cambium config_name="default" runs="1,2,5,6": - #!/usr/bin/env bash - set -euo pipefail - : "${RDP_BATCH:?Set RDP_BATCH before running}" - export RDP_BATCH - just materialize-adoption "{{ config_name }}" - just create-cambium-dist-mc-all-years - just generate-adoption-scenarios-cambium "{{ config_name }}" "{{ runs }}" - # Determine number of run years from the adoption config. - run_years_count=$(uv run python -c " -import yaml -with open('{{ path_adoption_config_dir }}/{{ config_name }}.yaml') as f: - cfg = yaml.safe_load(f) -years = cfg.get('run_years', cfg.get('year_labels', [])) -print(len(years)) -") - IFS=',' read -ra base_nums <<< "{{ runs }}" - for yi in $(seq 1 "$run_years_count"); do - for base_run in "${base_nums[@]}"; do - key=$((yi * 100 + base_run)) - echo ">> run-adoption-all-cambium: run-${key} (year_index=$((yi - 1)), base_run=${base_run})" >&2 - just run-adoption-scenario-cambium "${key}" + # --- Derive per-year seasonal tariffs (only when runs 1,2 and 5 or 6 are present) --- + has_run1=false; has_run2=false; has_run5=false; has_run6=false + for r in "${all_runs[@]}"; do + [[ "$r" == "1" ]] && has_run1=true + [[ "$r" == "2" ]] && has_run2=true + [[ "$r" == "5" ]] && has_run5=true + [[ "$r" == "6" ]] && has_run6=true + done + if $has_run1 && $has_run5; then + run1_dir=$(bash "{{ latest_output }}" "${adoption_yaml}" $(( key_prefix + 1 ))) + echo ">> run-adoption-all: deriving seasonal (delivery) from ${run1_dir}" >&2 + just compute-seasonal-discount-inputs \ + "${run1_dir}" "${loads_base}" "{{ state_upper }}" "{{ upgrade }}" + just create-seasonal-discount-tariff \ + "${tariff_dir}/{{ utility }}_flat_calibrated.json" \ + "${run1_dir}/seasonal_discount_rate_inputs.csv" \ + "{{ utility }}_hp_seasonal" \ + "${tariff_dir}/{{ utility }}_hp_seasonal.json" + fi + if $has_run2 && $has_run6; then + run2_dir=$(bash "{{ latest_output }}" "${adoption_yaml}" $(( key_prefix + 2 ))) + echo ">> run-adoption-all: deriving seasonal (supply) from ${run2_dir}" >&2 + just compute-seasonal-discount-inputs \ + "${run2_dir}" "${loads_base}" "{{ state_upper }}" "{{ upgrade }}" + just create-seasonal-discount-tariff \ + "${tariff_dir}/{{ utility }}_flat_supply_calibrated.json" \ + "${run2_dir}/seasonal_discount_rate_inputs.csv" \ + "{{ utility }}_hp_seasonal_supply" \ + "${tariff_dir}/{{ utility }}_hp_seasonal_supply.json" + fi + # --- Runs 5 and 6: precalc seasonal --- + for base_run in "${all_runs[@]}"; do + [[ "${base_run}" == "5" || "${base_run}" == "6" ]] || continue + key=$(( key_prefix + base_run )) + echo ">> run-adoption-all: run-${key} (year=${year}, base_run=${base_run})" >&2 + just run-adoption-scenario "${key}" + run_dir=$(bash "{{ latest_output }}" "${adoption_yaml}" "${key}") + just copy-calibrated-tariff-from-run "${run_dir}" "${tariff_dir}" + done + # --- Any remaining runs (not 1,2,5,6) --- + for base_run in "${all_runs[@]}"; do + [[ "${base_run}" == "1" || "${base_run}" == "2" ]] && continue + [[ "${base_run}" == "5" || "${base_run}" == "6" ]] && continue + key=$(( key_prefix + base_run )) + echo ">> run-adoption-all: run-${key} (year=${year}, base_run=${base_run})" >&2 + just run-adoption-scenario "${key}" done done diff --git a/rate_design/hp_rates/ny/config/scenarios/scenarios_nyseg_adoption.yaml b/rate_design/hp_rates/ny/config/scenarios/scenarios_nyseg_adoption.yaml new file mode 100644 index 00000000..dac336e5 --- /dev/null +++ b/rate_design/hp_rates/ny/config/scenarios/scenarios_nyseg_adoption.yaml @@ -0,0 +1,732 @@ +runs: + 101: + run_name: ny_nyseg_run1_up00_precalc_y2025_mixed__flat + state: NY + utility: nyseg + run_type: precalc + upgrade: '0' + path_tariff_maps_electric: tariff_maps/electric/nyseg_flat.csv + path_tariff_maps_gas: tariff_maps/gas/nyseg_u00.csv + path_resstock_metadata: /ebs/data/nrel/resstock/res_2024_amy2018_2_sb/adoption/nyca_electrification/year=2025/metadata-sb.parquet + path_resstock_loads: /ebs/data/nrel/resstock/res_2024_amy2018_2_sb/adoption/nyca_electrification/year=2025/load_curve_hourly/state=NY/upgrade=00 + path_dist_and_sub_tx_mc: s3://data.sb/switchbox/marginal_costs/ny/cambium_dist_and_sub_tx/utility=nyseg/year=2025/data.parquet + path_utility_assignment: /ebs/data/nrel/resstock/res_2024_amy2018_2_sb/metadata_utility/state=NY/utility_assignment.parquet + path_tariffs_gas: tariffs/gas + path_outputs: /data.sb/switchbox/cairo/outputs/hp_rates/ny/nyseg//ny_nyseg_run1_up00_precalc__flat + path_supply_energy_mc: s3://data.sb/switchbox/marginal_costs/ny/supply/energy/utility=nyseg/year=2025/zero.parquet + path_supply_capacity_mc: s3://data.sb/switchbox/marginal_costs/ny/supply/capacity/utility=nyseg/year=2025/zero.parquet + path_tariffs_electric: + all: tariffs/electric/nyseg_flat.json + utility_revenue_requirement: null + run_includes_supply: false + run_includes_subclasses: false + path_electric_utility_stats: s3://data.sb/eia/861/electric_utility_stats/year=2024/state=NY/data.parquet + path_bulk_tx_mc: '' + solar_pv_compensation: net_metering + year_run: 2025 + year_dollar_conversion: 2025 + process_workers: 8 + elasticity: 0.0 + residual_cost_frac: 0.0 + + 102: + run_name: ny_nyseg_run2_up00_precalc_supply_y2025_mixed__flat + state: NY + utility: nyseg + run_type: precalc + upgrade: '0' + path_tariff_maps_electric: tariff_maps/electric/nyseg_flat_supply.csv + path_tariff_maps_gas: tariff_maps/gas/nyseg_u00.csv + path_resstock_metadata: /ebs/data/nrel/resstock/res_2024_amy2018_2_sb/adoption/nyca_electrification/year=2025/metadata-sb.parquet + path_resstock_loads: /ebs/data/nrel/resstock/res_2024_amy2018_2_sb/adoption/nyca_electrification/year=2025/load_curve_hourly/state=NY/upgrade=00 + path_dist_and_sub_tx_mc: s3://data.sb/switchbox/marginal_costs/ny/cambium_dist_and_sub_tx/utility=nyseg/year=2025/data.parquet + path_utility_assignment: /ebs/data/nrel/resstock/res_2024_amy2018_2_sb/metadata_utility/state=NY/utility_assignment.parquet + path_tariffs_gas: tariffs/gas + path_outputs: /data.sb/switchbox/cairo/outputs/hp_rates/ny/nyseg//ny_nyseg_run2_up00_precalc_supply__flat + path_supply_energy_mc: s3://data.sb/nrel/cambium/2024/scenario=MidCase/t=2025/gea=NYISO/r=p127/data.parquet + path_supply_capacity_mc: s3://data.sb/nrel/cambium/2024/scenario=MidCase/t=2025/gea=NYISO/r=p127/data.parquet + path_tariffs_electric: + all: tariffs/electric/nyseg_flat_supply.json + utility_revenue_requirement: null + run_includes_supply: true + run_includes_subclasses: false + path_electric_utility_stats: s3://data.sb/eia/861/electric_utility_stats/year=2024/state=NY/data.parquet + path_bulk_tx_mc: '' + solar_pv_compensation: net_metering + year_run: 2025 + year_dollar_conversion: 2025 + process_workers: 8 + elasticity: 0.0 + residual_cost_frac: 0.0 + + 105: + run_name: ny_nyseg_run5_up00_precalc_y2025_mixed__hp_seasonal_vs_flat + state: NY + utility: nyseg + run_type: precalc + upgrade: '0' + path_tariff_maps_electric: tariff_maps/electric/nyseg_hp_seasonal_vs_flat.csv + path_tariff_maps_gas: tariff_maps/gas/nyseg_u00.csv + path_resstock_metadata: /ebs/data/nrel/resstock/res_2024_amy2018_2_sb/adoption/nyca_electrification/year=2025/metadata-sb.parquet + path_resstock_loads: /ebs/data/nrel/resstock/res_2024_amy2018_2_sb/adoption/nyca_electrification/year=2025/load_curve_hourly/state=NY/upgrade=00 + path_dist_and_sub_tx_mc: s3://data.sb/switchbox/marginal_costs/ny/cambium_dist_and_sub_tx/utility=nyseg/year=2025/data.parquet + path_utility_assignment: /ebs/data/nrel/resstock/res_2024_amy2018_2_sb/metadata_utility/state=NY/utility_assignment.parquet + path_tariffs_gas: tariffs/gas + path_outputs: /data.sb/switchbox/cairo/outputs/hp_rates/ny/nyseg//ny_nyseg_run5_up00_precalc__hp_seasonal_vs_flat + path_supply_energy_mc: s3://data.sb/switchbox/marginal_costs/ny/supply/energy/utility=nyseg/year=2025/zero.parquet + path_supply_capacity_mc: s3://data.sb/switchbox/marginal_costs/ny/supply/capacity/utility=nyseg/year=2025/zero.parquet + path_tariffs_electric: + hp: /ebs/home/sherry_switch_box/rate-design-platform/rate_design/hp_rates/ny/config/tariffs/electric/adoption/nyca_electrification/year=2025/nyseg_hp_seasonal.json + non-hp: /ebs/home/sherry_switch_box/rate-design-platform/rate_design/hp_rates/ny/config/tariffs/electric/adoption/nyca_electrification/year=2025/nyseg_nonhp_flat.json + utility_revenue_requirement: null + run_includes_supply: false + run_includes_subclasses: true + path_electric_utility_stats: s3://data.sb/eia/861/electric_utility_stats/year=2024/state=NY/data.parquet + path_bulk_tx_mc: '' + solar_pv_compensation: net_metering + year_run: 2025 + year_dollar_conversion: 2025 + process_workers: 8 + elasticity: 0.0 + residual_cost_frac: 0.0 + + 106: + run_name: ny_nyseg_run6_up00_precalc_supply_y2025_mixed__hp_seasonal_vs_flat + state: NY + utility: nyseg + run_type: precalc + upgrade: '0' + path_tariff_maps_electric: tariff_maps/electric/nyseg_hp_seasonal_vs_flat_supply.csv + path_tariff_maps_gas: tariff_maps/gas/nyseg_u00.csv + path_resstock_metadata: /ebs/data/nrel/resstock/res_2024_amy2018_2_sb/adoption/nyca_electrification/year=2025/metadata-sb.parquet + path_resstock_loads: /ebs/data/nrel/resstock/res_2024_amy2018_2_sb/adoption/nyca_electrification/year=2025/load_curve_hourly/state=NY/upgrade=00 + path_dist_and_sub_tx_mc: s3://data.sb/switchbox/marginal_costs/ny/cambium_dist_and_sub_tx/utility=nyseg/year=2025/data.parquet + path_utility_assignment: /ebs/data/nrel/resstock/res_2024_amy2018_2_sb/metadata_utility/state=NY/utility_assignment.parquet + path_tariffs_gas: tariffs/gas + path_outputs: /data.sb/switchbox/cairo/outputs/hp_rates/ny/nyseg//ny_nyseg_run6_up00_precalc_supply__hp_seasonal_vs_flat + path_supply_energy_mc: s3://data.sb/nrel/cambium/2024/scenario=MidCase/t=2025/gea=NYISO/r=p127/data.parquet + path_supply_capacity_mc: s3://data.sb/nrel/cambium/2024/scenario=MidCase/t=2025/gea=NYISO/r=p127/data.parquet + path_tariffs_electric: + hp: /ebs/home/sherry_switch_box/rate-design-platform/rate_design/hp_rates/ny/config/tariffs/electric/adoption/nyca_electrification/year=2025/nyseg_hp_seasonal.json + non-hp: /ebs/home/sherry_switch_box/rate-design-platform/rate_design/hp_rates/ny/config/tariffs/electric/adoption/nyca_electrification/year=2025/nyseg_nonhp_flat.json + utility_revenue_requirement: null + run_includes_supply: true + run_includes_subclasses: true + path_electric_utility_stats: s3://data.sb/eia/861/electric_utility_stats/year=2024/state=NY/data.parquet + path_bulk_tx_mc: '' + solar_pv_compensation: net_metering + year_run: 2025 + year_dollar_conversion: 2025 + process_workers: 8 + elasticity: 0.0 + residual_cost_frac: 0.0 + + 201: + run_name: ny_nyseg_run1_up00_precalc_y2030_mixed__flat + state: NY + utility: nyseg + run_type: precalc + upgrade: '0' + path_tariff_maps_electric: tariff_maps/electric/nyseg_flat.csv + path_tariff_maps_gas: tariff_maps/gas/nyseg_u00.csv + path_resstock_metadata: /ebs/data/nrel/resstock/res_2024_amy2018_2_sb/adoption/nyca_electrification/year=2030/metadata-sb.parquet + path_resstock_loads: /ebs/data/nrel/resstock/res_2024_amy2018_2_sb/adoption/nyca_electrification/year=2030/load_curve_hourly/state=NY/upgrade=00 + path_dist_and_sub_tx_mc: s3://data.sb/switchbox/marginal_costs/ny/cambium_dist_and_sub_tx/utility=nyseg/year=2030/data.parquet + path_utility_assignment: /ebs/data/nrel/resstock/res_2024_amy2018_2_sb/metadata_utility/state=NY/utility_assignment.parquet + path_tariffs_gas: tariffs/gas + path_outputs: /data.sb/switchbox/cairo/outputs/hp_rates/ny/nyseg//ny_nyseg_run1_up00_precalc__flat + path_supply_energy_mc: s3://data.sb/switchbox/marginal_costs/ny/supply/energy/utility=nyseg/year=2030/zero.parquet + path_supply_capacity_mc: s3://data.sb/switchbox/marginal_costs/ny/supply/capacity/utility=nyseg/year=2030/zero.parquet + path_tariffs_electric: + all: tariffs/electric/nyseg_flat.json + utility_revenue_requirement: null + run_includes_supply: false + run_includes_subclasses: false + path_electric_utility_stats: s3://data.sb/eia/861/electric_utility_stats/year=2024/state=NY/data.parquet + path_bulk_tx_mc: '' + solar_pv_compensation: net_metering + year_run: 2030 + year_dollar_conversion: 2025 + process_workers: 8 + elasticity: 0.0 + residual_cost_frac: 0.0 + + 202: + run_name: ny_nyseg_run2_up00_precalc_supply_y2030_mixed__flat + state: NY + utility: nyseg + run_type: precalc + upgrade: '0' + path_tariff_maps_electric: tariff_maps/electric/nyseg_flat_supply.csv + path_tariff_maps_gas: tariff_maps/gas/nyseg_u00.csv + path_resstock_metadata: /ebs/data/nrel/resstock/res_2024_amy2018_2_sb/adoption/nyca_electrification/year=2030/metadata-sb.parquet + path_resstock_loads: /ebs/data/nrel/resstock/res_2024_amy2018_2_sb/adoption/nyca_electrification/year=2030/load_curve_hourly/state=NY/upgrade=00 + path_dist_and_sub_tx_mc: s3://data.sb/switchbox/marginal_costs/ny/cambium_dist_and_sub_tx/utility=nyseg/year=2030/data.parquet + path_utility_assignment: /ebs/data/nrel/resstock/res_2024_amy2018_2_sb/metadata_utility/state=NY/utility_assignment.parquet + path_tariffs_gas: tariffs/gas + path_outputs: /data.sb/switchbox/cairo/outputs/hp_rates/ny/nyseg//ny_nyseg_run2_up00_precalc_supply__flat + path_supply_energy_mc: s3://data.sb/nrel/cambium/2024/scenario=MidCase/t=2030/gea=NYISO/r=p127/data.parquet + path_supply_capacity_mc: s3://data.sb/nrel/cambium/2024/scenario=MidCase/t=2030/gea=NYISO/r=p127/data.parquet + path_tariffs_electric: + all: tariffs/electric/nyseg_flat_supply.json + utility_revenue_requirement: null + run_includes_supply: true + run_includes_subclasses: false + path_electric_utility_stats: s3://data.sb/eia/861/electric_utility_stats/year=2024/state=NY/data.parquet + path_bulk_tx_mc: '' + solar_pv_compensation: net_metering + year_run: 2030 + year_dollar_conversion: 2025 + process_workers: 8 + elasticity: 0.0 + residual_cost_frac: 0.0 + + 205: + run_name: ny_nyseg_run5_up00_precalc_y2030_mixed__hp_seasonal_vs_flat + state: NY + utility: nyseg + run_type: precalc + upgrade: '0' + path_tariff_maps_electric: tariff_maps/electric/nyseg_hp_seasonal_vs_flat.csv + path_tariff_maps_gas: tariff_maps/gas/nyseg_u00.csv + path_resstock_metadata: /ebs/data/nrel/resstock/res_2024_amy2018_2_sb/adoption/nyca_electrification/year=2030/metadata-sb.parquet + path_resstock_loads: /ebs/data/nrel/resstock/res_2024_amy2018_2_sb/adoption/nyca_electrification/year=2030/load_curve_hourly/state=NY/upgrade=00 + path_dist_and_sub_tx_mc: s3://data.sb/switchbox/marginal_costs/ny/cambium_dist_and_sub_tx/utility=nyseg/year=2030/data.parquet + path_utility_assignment: /ebs/data/nrel/resstock/res_2024_amy2018_2_sb/metadata_utility/state=NY/utility_assignment.parquet + path_tariffs_gas: tariffs/gas + path_outputs: /data.sb/switchbox/cairo/outputs/hp_rates/ny/nyseg//ny_nyseg_run5_up00_precalc__hp_seasonal_vs_flat + path_supply_energy_mc: s3://data.sb/switchbox/marginal_costs/ny/supply/energy/utility=nyseg/year=2030/zero.parquet + path_supply_capacity_mc: s3://data.sb/switchbox/marginal_costs/ny/supply/capacity/utility=nyseg/year=2030/zero.parquet + path_tariffs_electric: + hp: /ebs/home/sherry_switch_box/rate-design-platform/rate_design/hp_rates/ny/config/tariffs/electric/adoption/nyca_electrification/year=2030/nyseg_hp_seasonal.json + non-hp: /ebs/home/sherry_switch_box/rate-design-platform/rate_design/hp_rates/ny/config/tariffs/electric/adoption/nyca_electrification/year=2030/nyseg_nonhp_flat.json + utility_revenue_requirement: null + run_includes_supply: false + run_includes_subclasses: true + path_electric_utility_stats: s3://data.sb/eia/861/electric_utility_stats/year=2024/state=NY/data.parquet + path_bulk_tx_mc: '' + solar_pv_compensation: net_metering + year_run: 2030 + year_dollar_conversion: 2025 + process_workers: 8 + elasticity: 0.0 + residual_cost_frac: 0.0 + + 206: + run_name: ny_nyseg_run6_up00_precalc_supply_y2030_mixed__hp_seasonal_vs_flat + state: NY + utility: nyseg + run_type: precalc + upgrade: '0' + path_tariff_maps_electric: tariff_maps/electric/nyseg_hp_seasonal_vs_flat_supply.csv + path_tariff_maps_gas: tariff_maps/gas/nyseg_u00.csv + path_resstock_metadata: /ebs/data/nrel/resstock/res_2024_amy2018_2_sb/adoption/nyca_electrification/year=2030/metadata-sb.parquet + path_resstock_loads: /ebs/data/nrel/resstock/res_2024_amy2018_2_sb/adoption/nyca_electrification/year=2030/load_curve_hourly/state=NY/upgrade=00 + path_dist_and_sub_tx_mc: s3://data.sb/switchbox/marginal_costs/ny/cambium_dist_and_sub_tx/utility=nyseg/year=2030/data.parquet + path_utility_assignment: /ebs/data/nrel/resstock/res_2024_amy2018_2_sb/metadata_utility/state=NY/utility_assignment.parquet + path_tariffs_gas: tariffs/gas + path_outputs: /data.sb/switchbox/cairo/outputs/hp_rates/ny/nyseg//ny_nyseg_run6_up00_precalc_supply__hp_seasonal_vs_flat + path_supply_energy_mc: s3://data.sb/nrel/cambium/2024/scenario=MidCase/t=2030/gea=NYISO/r=p127/data.parquet + path_supply_capacity_mc: s3://data.sb/nrel/cambium/2024/scenario=MidCase/t=2030/gea=NYISO/r=p127/data.parquet + path_tariffs_electric: + hp: /ebs/home/sherry_switch_box/rate-design-platform/rate_design/hp_rates/ny/config/tariffs/electric/adoption/nyca_electrification/year=2030/nyseg_hp_seasonal.json + non-hp: /ebs/home/sherry_switch_box/rate-design-platform/rate_design/hp_rates/ny/config/tariffs/electric/adoption/nyca_electrification/year=2030/nyseg_nonhp_flat.json + utility_revenue_requirement: null + run_includes_supply: true + run_includes_subclasses: true + path_electric_utility_stats: s3://data.sb/eia/861/electric_utility_stats/year=2024/state=NY/data.parquet + path_bulk_tx_mc: '' + solar_pv_compensation: net_metering + year_run: 2030 + year_dollar_conversion: 2025 + process_workers: 8 + elasticity: 0.0 + residual_cost_frac: 0.0 + + 301: + run_name: ny_nyseg_run1_up00_precalc_y2035_mixed__flat + state: NY + utility: nyseg + run_type: precalc + upgrade: '0' + path_tariff_maps_electric: tariff_maps/electric/nyseg_flat.csv + path_tariff_maps_gas: tariff_maps/gas/nyseg_u00.csv + path_resstock_metadata: /ebs/data/nrel/resstock/res_2024_amy2018_2_sb/adoption/nyca_electrification/year=2035/metadata-sb.parquet + path_resstock_loads: /ebs/data/nrel/resstock/res_2024_amy2018_2_sb/adoption/nyca_electrification/year=2035/load_curve_hourly/state=NY/upgrade=00 + path_dist_and_sub_tx_mc: s3://data.sb/switchbox/marginal_costs/ny/cambium_dist_and_sub_tx/utility=nyseg/year=2035/data.parquet + path_utility_assignment: /ebs/data/nrel/resstock/res_2024_amy2018_2_sb/metadata_utility/state=NY/utility_assignment.parquet + path_tariffs_gas: tariffs/gas + path_outputs: /data.sb/switchbox/cairo/outputs/hp_rates/ny/nyseg//ny_nyseg_run1_up00_precalc__flat + path_supply_energy_mc: s3://data.sb/switchbox/marginal_costs/ny/supply/energy/utility=nyseg/year=2035/zero.parquet + path_supply_capacity_mc: s3://data.sb/switchbox/marginal_costs/ny/supply/capacity/utility=nyseg/year=2035/zero.parquet + path_tariffs_electric: + all: tariffs/electric/nyseg_flat.json + utility_revenue_requirement: null + run_includes_supply: false + run_includes_subclasses: false + path_electric_utility_stats: s3://data.sb/eia/861/electric_utility_stats/year=2024/state=NY/data.parquet + path_bulk_tx_mc: '' + solar_pv_compensation: net_metering + year_run: 2035 + year_dollar_conversion: 2025 + process_workers: 8 + elasticity: 0.0 + residual_cost_frac: 0.0 + + 302: + run_name: ny_nyseg_run2_up00_precalc_supply_y2035_mixed__flat + state: NY + utility: nyseg + run_type: precalc + upgrade: '0' + path_tariff_maps_electric: tariff_maps/electric/nyseg_flat_supply.csv + path_tariff_maps_gas: tariff_maps/gas/nyseg_u00.csv + path_resstock_metadata: /ebs/data/nrel/resstock/res_2024_amy2018_2_sb/adoption/nyca_electrification/year=2035/metadata-sb.parquet + path_resstock_loads: /ebs/data/nrel/resstock/res_2024_amy2018_2_sb/adoption/nyca_electrification/year=2035/load_curve_hourly/state=NY/upgrade=00 + path_dist_and_sub_tx_mc: s3://data.sb/switchbox/marginal_costs/ny/cambium_dist_and_sub_tx/utility=nyseg/year=2035/data.parquet + path_utility_assignment: /ebs/data/nrel/resstock/res_2024_amy2018_2_sb/metadata_utility/state=NY/utility_assignment.parquet + path_tariffs_gas: tariffs/gas + path_outputs: /data.sb/switchbox/cairo/outputs/hp_rates/ny/nyseg//ny_nyseg_run2_up00_precalc_supply__flat + path_supply_energy_mc: s3://data.sb/nrel/cambium/2024/scenario=MidCase/t=2035/gea=NYISO/r=p127/data.parquet + path_supply_capacity_mc: s3://data.sb/nrel/cambium/2024/scenario=MidCase/t=2035/gea=NYISO/r=p127/data.parquet + path_tariffs_electric: + all: tariffs/electric/nyseg_flat_supply.json + utility_revenue_requirement: null + run_includes_supply: true + run_includes_subclasses: false + path_electric_utility_stats: s3://data.sb/eia/861/electric_utility_stats/year=2024/state=NY/data.parquet + path_bulk_tx_mc: '' + solar_pv_compensation: net_metering + year_run: 2035 + year_dollar_conversion: 2025 + process_workers: 8 + elasticity: 0.0 + residual_cost_frac: 0.0 + + 305: + run_name: ny_nyseg_run5_up00_precalc_y2035_mixed__hp_seasonal_vs_flat + state: NY + utility: nyseg + run_type: precalc + upgrade: '0' + path_tariff_maps_electric: tariff_maps/electric/nyseg_hp_seasonal_vs_flat.csv + path_tariff_maps_gas: tariff_maps/gas/nyseg_u00.csv + path_resstock_metadata: /ebs/data/nrel/resstock/res_2024_amy2018_2_sb/adoption/nyca_electrification/year=2035/metadata-sb.parquet + path_resstock_loads: /ebs/data/nrel/resstock/res_2024_amy2018_2_sb/adoption/nyca_electrification/year=2035/load_curve_hourly/state=NY/upgrade=00 + path_dist_and_sub_tx_mc: s3://data.sb/switchbox/marginal_costs/ny/cambium_dist_and_sub_tx/utility=nyseg/year=2035/data.parquet + path_utility_assignment: /ebs/data/nrel/resstock/res_2024_amy2018_2_sb/metadata_utility/state=NY/utility_assignment.parquet + path_tariffs_gas: tariffs/gas + path_outputs: /data.sb/switchbox/cairo/outputs/hp_rates/ny/nyseg//ny_nyseg_run5_up00_precalc__hp_seasonal_vs_flat + path_supply_energy_mc: s3://data.sb/switchbox/marginal_costs/ny/supply/energy/utility=nyseg/year=2035/zero.parquet + path_supply_capacity_mc: s3://data.sb/switchbox/marginal_costs/ny/supply/capacity/utility=nyseg/year=2035/zero.parquet + path_tariffs_electric: + hp: /ebs/home/sherry_switch_box/rate-design-platform/rate_design/hp_rates/ny/config/tariffs/electric/adoption/nyca_electrification/year=2035/nyseg_hp_seasonal.json + non-hp: /ebs/home/sherry_switch_box/rate-design-platform/rate_design/hp_rates/ny/config/tariffs/electric/adoption/nyca_electrification/year=2035/nyseg_nonhp_flat.json + utility_revenue_requirement: null + run_includes_supply: false + run_includes_subclasses: true + path_electric_utility_stats: s3://data.sb/eia/861/electric_utility_stats/year=2024/state=NY/data.parquet + path_bulk_tx_mc: '' + solar_pv_compensation: net_metering + year_run: 2035 + year_dollar_conversion: 2025 + process_workers: 8 + elasticity: 0.0 + residual_cost_frac: 0.0 + + 306: + run_name: ny_nyseg_run6_up00_precalc_supply_y2035_mixed__hp_seasonal_vs_flat + state: NY + utility: nyseg + run_type: precalc + upgrade: '0' + path_tariff_maps_electric: tariff_maps/electric/nyseg_hp_seasonal_vs_flat_supply.csv + path_tariff_maps_gas: tariff_maps/gas/nyseg_u00.csv + path_resstock_metadata: /ebs/data/nrel/resstock/res_2024_amy2018_2_sb/adoption/nyca_electrification/year=2035/metadata-sb.parquet + path_resstock_loads: /ebs/data/nrel/resstock/res_2024_amy2018_2_sb/adoption/nyca_electrification/year=2035/load_curve_hourly/state=NY/upgrade=00 + path_dist_and_sub_tx_mc: s3://data.sb/switchbox/marginal_costs/ny/cambium_dist_and_sub_tx/utility=nyseg/year=2035/data.parquet + path_utility_assignment: /ebs/data/nrel/resstock/res_2024_amy2018_2_sb/metadata_utility/state=NY/utility_assignment.parquet + path_tariffs_gas: tariffs/gas + path_outputs: /data.sb/switchbox/cairo/outputs/hp_rates/ny/nyseg//ny_nyseg_run6_up00_precalc_supply__hp_seasonal_vs_flat + path_supply_energy_mc: s3://data.sb/nrel/cambium/2024/scenario=MidCase/t=2035/gea=NYISO/r=p127/data.parquet + path_supply_capacity_mc: s3://data.sb/nrel/cambium/2024/scenario=MidCase/t=2035/gea=NYISO/r=p127/data.parquet + path_tariffs_electric: + hp: /ebs/home/sherry_switch_box/rate-design-platform/rate_design/hp_rates/ny/config/tariffs/electric/adoption/nyca_electrification/year=2035/nyseg_hp_seasonal.json + non-hp: /ebs/home/sherry_switch_box/rate-design-platform/rate_design/hp_rates/ny/config/tariffs/electric/adoption/nyca_electrification/year=2035/nyseg_nonhp_flat.json + utility_revenue_requirement: null + run_includes_supply: true + run_includes_subclasses: true + path_electric_utility_stats: s3://data.sb/eia/861/electric_utility_stats/year=2024/state=NY/data.parquet + path_bulk_tx_mc: '' + solar_pv_compensation: net_metering + year_run: 2035 + year_dollar_conversion: 2025 + process_workers: 8 + elasticity: 0.0 + residual_cost_frac: 0.0 + + 401: + run_name: ny_nyseg_run1_up00_precalc_y2040_mixed__flat + state: NY + utility: nyseg + run_type: precalc + upgrade: '0' + path_tariff_maps_electric: tariff_maps/electric/nyseg_flat.csv + path_tariff_maps_gas: tariff_maps/gas/nyseg_u00.csv + path_resstock_metadata: /ebs/data/nrel/resstock/res_2024_amy2018_2_sb/adoption/nyca_electrification/year=2040/metadata-sb.parquet + path_resstock_loads: /ebs/data/nrel/resstock/res_2024_amy2018_2_sb/adoption/nyca_electrification/year=2040/load_curve_hourly/state=NY/upgrade=00 + path_dist_and_sub_tx_mc: s3://data.sb/switchbox/marginal_costs/ny/cambium_dist_and_sub_tx/utility=nyseg/year=2040/data.parquet + path_utility_assignment: /ebs/data/nrel/resstock/res_2024_amy2018_2_sb/metadata_utility/state=NY/utility_assignment.parquet + path_tariffs_gas: tariffs/gas + path_outputs: /data.sb/switchbox/cairo/outputs/hp_rates/ny/nyseg//ny_nyseg_run1_up00_precalc__flat + path_supply_energy_mc: s3://data.sb/switchbox/marginal_costs/ny/supply/energy/utility=nyseg/year=2040/zero.parquet + path_supply_capacity_mc: s3://data.sb/switchbox/marginal_costs/ny/supply/capacity/utility=nyseg/year=2040/zero.parquet + path_tariffs_electric: + all: tariffs/electric/nyseg_flat.json + utility_revenue_requirement: null + run_includes_supply: false + run_includes_subclasses: false + path_electric_utility_stats: s3://data.sb/eia/861/electric_utility_stats/year=2024/state=NY/data.parquet + path_bulk_tx_mc: '' + solar_pv_compensation: net_metering + year_run: 2040 + year_dollar_conversion: 2025 + process_workers: 8 + elasticity: 0.0 + residual_cost_frac: 0.0 + + 402: + run_name: ny_nyseg_run2_up00_precalc_supply_y2040_mixed__flat + state: NY + utility: nyseg + run_type: precalc + upgrade: '0' + path_tariff_maps_electric: tariff_maps/electric/nyseg_flat_supply.csv + path_tariff_maps_gas: tariff_maps/gas/nyseg_u00.csv + path_resstock_metadata: /ebs/data/nrel/resstock/res_2024_amy2018_2_sb/adoption/nyca_electrification/year=2040/metadata-sb.parquet + path_resstock_loads: /ebs/data/nrel/resstock/res_2024_amy2018_2_sb/adoption/nyca_electrification/year=2040/load_curve_hourly/state=NY/upgrade=00 + path_dist_and_sub_tx_mc: s3://data.sb/switchbox/marginal_costs/ny/cambium_dist_and_sub_tx/utility=nyseg/year=2040/data.parquet + path_utility_assignment: /ebs/data/nrel/resstock/res_2024_amy2018_2_sb/metadata_utility/state=NY/utility_assignment.parquet + path_tariffs_gas: tariffs/gas + path_outputs: /data.sb/switchbox/cairo/outputs/hp_rates/ny/nyseg//ny_nyseg_run2_up00_precalc_supply__flat + path_supply_energy_mc: s3://data.sb/nrel/cambium/2024/scenario=MidCase/t=2040/gea=NYISO/r=p127/data.parquet + path_supply_capacity_mc: s3://data.sb/nrel/cambium/2024/scenario=MidCase/t=2040/gea=NYISO/r=p127/data.parquet + path_tariffs_electric: + all: tariffs/electric/nyseg_flat_supply.json + utility_revenue_requirement: null + run_includes_supply: true + run_includes_subclasses: false + path_electric_utility_stats: s3://data.sb/eia/861/electric_utility_stats/year=2024/state=NY/data.parquet + path_bulk_tx_mc: '' + solar_pv_compensation: net_metering + year_run: 2040 + year_dollar_conversion: 2025 + process_workers: 8 + elasticity: 0.0 + residual_cost_frac: 0.0 + + 405: + run_name: ny_nyseg_run5_up00_precalc_y2040_mixed__hp_seasonal_vs_flat + state: NY + utility: nyseg + run_type: precalc + upgrade: '0' + path_tariff_maps_electric: tariff_maps/electric/nyseg_hp_seasonal_vs_flat.csv + path_tariff_maps_gas: tariff_maps/gas/nyseg_u00.csv + path_resstock_metadata: /ebs/data/nrel/resstock/res_2024_amy2018_2_sb/adoption/nyca_electrification/year=2040/metadata-sb.parquet + path_resstock_loads: /ebs/data/nrel/resstock/res_2024_amy2018_2_sb/adoption/nyca_electrification/year=2040/load_curve_hourly/state=NY/upgrade=00 + path_dist_and_sub_tx_mc: s3://data.sb/switchbox/marginal_costs/ny/cambium_dist_and_sub_tx/utility=nyseg/year=2040/data.parquet + path_utility_assignment: /ebs/data/nrel/resstock/res_2024_amy2018_2_sb/metadata_utility/state=NY/utility_assignment.parquet + path_tariffs_gas: tariffs/gas + path_outputs: /data.sb/switchbox/cairo/outputs/hp_rates/ny/nyseg//ny_nyseg_run5_up00_precalc__hp_seasonal_vs_flat + path_supply_energy_mc: s3://data.sb/switchbox/marginal_costs/ny/supply/energy/utility=nyseg/year=2040/zero.parquet + path_supply_capacity_mc: s3://data.sb/switchbox/marginal_costs/ny/supply/capacity/utility=nyseg/year=2040/zero.parquet + path_tariffs_electric: + hp: /ebs/home/sherry_switch_box/rate-design-platform/rate_design/hp_rates/ny/config/tariffs/electric/adoption/nyca_electrification/year=2040/nyseg_hp_seasonal.json + non-hp: /ebs/home/sherry_switch_box/rate-design-platform/rate_design/hp_rates/ny/config/tariffs/electric/adoption/nyca_electrification/year=2040/nyseg_nonhp_flat.json + utility_revenue_requirement: null + run_includes_supply: false + run_includes_subclasses: true + path_electric_utility_stats: s3://data.sb/eia/861/electric_utility_stats/year=2024/state=NY/data.parquet + path_bulk_tx_mc: '' + solar_pv_compensation: net_metering + year_run: 2040 + year_dollar_conversion: 2025 + process_workers: 8 + elasticity: 0.0 + residual_cost_frac: 0.0 + + 406: + run_name: ny_nyseg_run6_up00_precalc_supply_y2040_mixed__hp_seasonal_vs_flat + state: NY + utility: nyseg + run_type: precalc + upgrade: '0' + path_tariff_maps_electric: tariff_maps/electric/nyseg_hp_seasonal_vs_flat_supply.csv + path_tariff_maps_gas: tariff_maps/gas/nyseg_u00.csv + path_resstock_metadata: /ebs/data/nrel/resstock/res_2024_amy2018_2_sb/adoption/nyca_electrification/year=2040/metadata-sb.parquet + path_resstock_loads: /ebs/data/nrel/resstock/res_2024_amy2018_2_sb/adoption/nyca_electrification/year=2040/load_curve_hourly/state=NY/upgrade=00 + path_dist_and_sub_tx_mc: s3://data.sb/switchbox/marginal_costs/ny/cambium_dist_and_sub_tx/utility=nyseg/year=2040/data.parquet + path_utility_assignment: /ebs/data/nrel/resstock/res_2024_amy2018_2_sb/metadata_utility/state=NY/utility_assignment.parquet + path_tariffs_gas: tariffs/gas + path_outputs: /data.sb/switchbox/cairo/outputs/hp_rates/ny/nyseg//ny_nyseg_run6_up00_precalc_supply__hp_seasonal_vs_flat + path_supply_energy_mc: s3://data.sb/nrel/cambium/2024/scenario=MidCase/t=2040/gea=NYISO/r=p127/data.parquet + path_supply_capacity_mc: s3://data.sb/nrel/cambium/2024/scenario=MidCase/t=2040/gea=NYISO/r=p127/data.parquet + path_tariffs_electric: + hp: /ebs/home/sherry_switch_box/rate-design-platform/rate_design/hp_rates/ny/config/tariffs/electric/adoption/nyca_electrification/year=2040/nyseg_hp_seasonal.json + non-hp: /ebs/home/sherry_switch_box/rate-design-platform/rate_design/hp_rates/ny/config/tariffs/electric/adoption/nyca_electrification/year=2040/nyseg_nonhp_flat.json + utility_revenue_requirement: null + run_includes_supply: true + run_includes_subclasses: true + path_electric_utility_stats: s3://data.sb/eia/861/electric_utility_stats/year=2024/state=NY/data.parquet + path_bulk_tx_mc: '' + solar_pv_compensation: net_metering + year_run: 2040 + year_dollar_conversion: 2025 + process_workers: 8 + elasticity: 0.0 + residual_cost_frac: 0.0 + + 501: + run_name: ny_nyseg_run1_up00_precalc_y2045_mixed__flat + state: NY + utility: nyseg + run_type: precalc + upgrade: '0' + path_tariff_maps_electric: tariff_maps/electric/nyseg_flat.csv + path_tariff_maps_gas: tariff_maps/gas/nyseg_u00.csv + path_resstock_metadata: /ebs/data/nrel/resstock/res_2024_amy2018_2_sb/adoption/nyca_electrification/year=2045/metadata-sb.parquet + path_resstock_loads: /ebs/data/nrel/resstock/res_2024_amy2018_2_sb/adoption/nyca_electrification/year=2045/load_curve_hourly/state=NY/upgrade=00 + path_dist_and_sub_tx_mc: s3://data.sb/switchbox/marginal_costs/ny/cambium_dist_and_sub_tx/utility=nyseg/year=2045/data.parquet + path_utility_assignment: /ebs/data/nrel/resstock/res_2024_amy2018_2_sb/metadata_utility/state=NY/utility_assignment.parquet + path_tariffs_gas: tariffs/gas + path_outputs: /data.sb/switchbox/cairo/outputs/hp_rates/ny/nyseg//ny_nyseg_run1_up00_precalc__flat + path_supply_energy_mc: s3://data.sb/switchbox/marginal_costs/ny/supply/energy/utility=nyseg/year=2045/zero.parquet + path_supply_capacity_mc: s3://data.sb/switchbox/marginal_costs/ny/supply/capacity/utility=nyseg/year=2045/zero.parquet + path_tariffs_electric: + all: tariffs/electric/nyseg_flat.json + utility_revenue_requirement: null + run_includes_supply: false + run_includes_subclasses: false + path_electric_utility_stats: s3://data.sb/eia/861/electric_utility_stats/year=2024/state=NY/data.parquet + path_bulk_tx_mc: '' + solar_pv_compensation: net_metering + year_run: 2045 + year_dollar_conversion: 2025 + process_workers: 8 + elasticity: 0.0 + residual_cost_frac: 0.0 + + 502: + run_name: ny_nyseg_run2_up00_precalc_supply_y2045_mixed__flat + state: NY + utility: nyseg + run_type: precalc + upgrade: '0' + path_tariff_maps_electric: tariff_maps/electric/nyseg_flat_supply.csv + path_tariff_maps_gas: tariff_maps/gas/nyseg_u00.csv + path_resstock_metadata: /ebs/data/nrel/resstock/res_2024_amy2018_2_sb/adoption/nyca_electrification/year=2045/metadata-sb.parquet + path_resstock_loads: /ebs/data/nrel/resstock/res_2024_amy2018_2_sb/adoption/nyca_electrification/year=2045/load_curve_hourly/state=NY/upgrade=00 + path_dist_and_sub_tx_mc: s3://data.sb/switchbox/marginal_costs/ny/cambium_dist_and_sub_tx/utility=nyseg/year=2045/data.parquet + path_utility_assignment: /ebs/data/nrel/resstock/res_2024_amy2018_2_sb/metadata_utility/state=NY/utility_assignment.parquet + path_tariffs_gas: tariffs/gas + path_outputs: /data.sb/switchbox/cairo/outputs/hp_rates/ny/nyseg//ny_nyseg_run2_up00_precalc_supply__flat + path_supply_energy_mc: s3://data.sb/nrel/cambium/2024/scenario=MidCase/t=2045/gea=NYISO/r=p127/data.parquet + path_supply_capacity_mc: s3://data.sb/nrel/cambium/2024/scenario=MidCase/t=2045/gea=NYISO/r=p127/data.parquet + path_tariffs_electric: + all: tariffs/electric/nyseg_flat_supply.json + utility_revenue_requirement: null + run_includes_supply: true + run_includes_subclasses: false + path_electric_utility_stats: s3://data.sb/eia/861/electric_utility_stats/year=2024/state=NY/data.parquet + path_bulk_tx_mc: '' + solar_pv_compensation: net_metering + year_run: 2045 + year_dollar_conversion: 2025 + process_workers: 8 + elasticity: 0.0 + residual_cost_frac: 0.0 + + 505: + run_name: ny_nyseg_run5_up00_precalc_y2045_mixed__hp_seasonal_vs_flat + state: NY + utility: nyseg + run_type: precalc + upgrade: '0' + path_tariff_maps_electric: tariff_maps/electric/nyseg_hp_seasonal_vs_flat.csv + path_tariff_maps_gas: tariff_maps/gas/nyseg_u00.csv + path_resstock_metadata: /ebs/data/nrel/resstock/res_2024_amy2018_2_sb/adoption/nyca_electrification/year=2045/metadata-sb.parquet + path_resstock_loads: /ebs/data/nrel/resstock/res_2024_amy2018_2_sb/adoption/nyca_electrification/year=2045/load_curve_hourly/state=NY/upgrade=00 + path_dist_and_sub_tx_mc: s3://data.sb/switchbox/marginal_costs/ny/cambium_dist_and_sub_tx/utility=nyseg/year=2045/data.parquet + path_utility_assignment: /ebs/data/nrel/resstock/res_2024_amy2018_2_sb/metadata_utility/state=NY/utility_assignment.parquet + path_tariffs_gas: tariffs/gas + path_outputs: /data.sb/switchbox/cairo/outputs/hp_rates/ny/nyseg//ny_nyseg_run5_up00_precalc__hp_seasonal_vs_flat + path_supply_energy_mc: s3://data.sb/switchbox/marginal_costs/ny/supply/energy/utility=nyseg/year=2045/zero.parquet + path_supply_capacity_mc: s3://data.sb/switchbox/marginal_costs/ny/supply/capacity/utility=nyseg/year=2045/zero.parquet + path_tariffs_electric: + hp: /ebs/home/sherry_switch_box/rate-design-platform/rate_design/hp_rates/ny/config/tariffs/electric/adoption/nyca_electrification/year=2045/nyseg_hp_seasonal.json + non-hp: /ebs/home/sherry_switch_box/rate-design-platform/rate_design/hp_rates/ny/config/tariffs/electric/adoption/nyca_electrification/year=2045/nyseg_nonhp_flat.json + utility_revenue_requirement: null + run_includes_supply: false + run_includes_subclasses: true + path_electric_utility_stats: s3://data.sb/eia/861/electric_utility_stats/year=2024/state=NY/data.parquet + path_bulk_tx_mc: '' + solar_pv_compensation: net_metering + year_run: 2045 + year_dollar_conversion: 2025 + process_workers: 8 + elasticity: 0.0 + residual_cost_frac: 0.0 + + 506: + run_name: ny_nyseg_run6_up00_precalc_supply_y2045_mixed__hp_seasonal_vs_flat + state: NY + utility: nyseg + run_type: precalc + upgrade: '0' + path_tariff_maps_electric: tariff_maps/electric/nyseg_hp_seasonal_vs_flat_supply.csv + path_tariff_maps_gas: tariff_maps/gas/nyseg_u00.csv + path_resstock_metadata: /ebs/data/nrel/resstock/res_2024_amy2018_2_sb/adoption/nyca_electrification/year=2045/metadata-sb.parquet + path_resstock_loads: /ebs/data/nrel/resstock/res_2024_amy2018_2_sb/adoption/nyca_electrification/year=2045/load_curve_hourly/state=NY/upgrade=00 + path_dist_and_sub_tx_mc: s3://data.sb/switchbox/marginal_costs/ny/cambium_dist_and_sub_tx/utility=nyseg/year=2045/data.parquet + path_utility_assignment: /ebs/data/nrel/resstock/res_2024_amy2018_2_sb/metadata_utility/state=NY/utility_assignment.parquet + path_tariffs_gas: tariffs/gas + path_outputs: /data.sb/switchbox/cairo/outputs/hp_rates/ny/nyseg//ny_nyseg_run6_up00_precalc_supply__hp_seasonal_vs_flat + path_supply_energy_mc: s3://data.sb/nrel/cambium/2024/scenario=MidCase/t=2045/gea=NYISO/r=p127/data.parquet + path_supply_capacity_mc: s3://data.sb/nrel/cambium/2024/scenario=MidCase/t=2045/gea=NYISO/r=p127/data.parquet + path_tariffs_electric: + hp: /ebs/home/sherry_switch_box/rate-design-platform/rate_design/hp_rates/ny/config/tariffs/electric/adoption/nyca_electrification/year=2045/nyseg_hp_seasonal.json + non-hp: /ebs/home/sherry_switch_box/rate-design-platform/rate_design/hp_rates/ny/config/tariffs/electric/adoption/nyca_electrification/year=2045/nyseg_nonhp_flat.json + utility_revenue_requirement: null + run_includes_supply: true + run_includes_subclasses: true + path_electric_utility_stats: s3://data.sb/eia/861/electric_utility_stats/year=2024/state=NY/data.parquet + path_bulk_tx_mc: '' + solar_pv_compensation: net_metering + year_run: 2045 + year_dollar_conversion: 2025 + process_workers: 8 + elasticity: 0.0 + residual_cost_frac: 0.0 + + 601: + run_name: ny_nyseg_run1_up00_precalc_y2050_mixed__flat + state: NY + utility: nyseg + run_type: precalc + upgrade: '0' + path_tariff_maps_electric: tariff_maps/electric/nyseg_flat.csv + path_tariff_maps_gas: tariff_maps/gas/nyseg_u00.csv + path_resstock_metadata: /ebs/data/nrel/resstock/res_2024_amy2018_2_sb/adoption/nyca_electrification/year=2050/metadata-sb.parquet + path_resstock_loads: /ebs/data/nrel/resstock/res_2024_amy2018_2_sb/adoption/nyca_electrification/year=2050/load_curve_hourly/state=NY/upgrade=00 + path_dist_and_sub_tx_mc: s3://data.sb/switchbox/marginal_costs/ny/cambium_dist_and_sub_tx/utility=nyseg/year=2050/data.parquet + path_utility_assignment: /ebs/data/nrel/resstock/res_2024_amy2018_2_sb/metadata_utility/state=NY/utility_assignment.parquet + path_tariffs_gas: tariffs/gas + path_outputs: /data.sb/switchbox/cairo/outputs/hp_rates/ny/nyseg//ny_nyseg_run1_up00_precalc__flat + path_supply_energy_mc: s3://data.sb/switchbox/marginal_costs/ny/supply/energy/utility=nyseg/year=2050/zero.parquet + path_supply_capacity_mc: s3://data.sb/switchbox/marginal_costs/ny/supply/capacity/utility=nyseg/year=2050/zero.parquet + path_tariffs_electric: + all: tariffs/electric/nyseg_flat.json + utility_revenue_requirement: null + run_includes_supply: false + run_includes_subclasses: false + path_electric_utility_stats: s3://data.sb/eia/861/electric_utility_stats/year=2024/state=NY/data.parquet + path_bulk_tx_mc: '' + solar_pv_compensation: net_metering + year_run: 2050 + year_dollar_conversion: 2025 + process_workers: 8 + elasticity: 0.0 + residual_cost_frac: 0.0 + + 602: + run_name: ny_nyseg_run2_up00_precalc_supply_y2050_mixed__flat + state: NY + utility: nyseg + run_type: precalc + upgrade: '0' + path_tariff_maps_electric: tariff_maps/electric/nyseg_flat_supply.csv + path_tariff_maps_gas: tariff_maps/gas/nyseg_u00.csv + path_resstock_metadata: /ebs/data/nrel/resstock/res_2024_amy2018_2_sb/adoption/nyca_electrification/year=2050/metadata-sb.parquet + path_resstock_loads: /ebs/data/nrel/resstock/res_2024_amy2018_2_sb/adoption/nyca_electrification/year=2050/load_curve_hourly/state=NY/upgrade=00 + path_dist_and_sub_tx_mc: s3://data.sb/switchbox/marginal_costs/ny/cambium_dist_and_sub_tx/utility=nyseg/year=2050/data.parquet + path_utility_assignment: /ebs/data/nrel/resstock/res_2024_amy2018_2_sb/metadata_utility/state=NY/utility_assignment.parquet + path_tariffs_gas: tariffs/gas + path_outputs: /data.sb/switchbox/cairo/outputs/hp_rates/ny/nyseg//ny_nyseg_run2_up00_precalc_supply__flat + path_supply_energy_mc: s3://data.sb/nrel/cambium/2024/scenario=MidCase/t=2050/gea=NYISO/r=p127/data.parquet + path_supply_capacity_mc: s3://data.sb/nrel/cambium/2024/scenario=MidCase/t=2050/gea=NYISO/r=p127/data.parquet + path_tariffs_electric: + all: tariffs/electric/nyseg_flat_supply.json + utility_revenue_requirement: null + run_includes_supply: true + run_includes_subclasses: false + path_electric_utility_stats: s3://data.sb/eia/861/electric_utility_stats/year=2024/state=NY/data.parquet + path_bulk_tx_mc: '' + solar_pv_compensation: net_metering + year_run: 2050 + year_dollar_conversion: 2025 + process_workers: 8 + elasticity: 0.0 + residual_cost_frac: 0.0 + + 605: + run_name: ny_nyseg_run5_up00_precalc_y2050_mixed__hp_seasonal_vs_flat + state: NY + utility: nyseg + run_type: precalc + upgrade: '0' + path_tariff_maps_electric: tariff_maps/electric/nyseg_hp_seasonal_vs_flat.csv + path_tariff_maps_gas: tariff_maps/gas/nyseg_u00.csv + path_resstock_metadata: /ebs/data/nrel/resstock/res_2024_amy2018_2_sb/adoption/nyca_electrification/year=2050/metadata-sb.parquet + path_resstock_loads: /ebs/data/nrel/resstock/res_2024_amy2018_2_sb/adoption/nyca_electrification/year=2050/load_curve_hourly/state=NY/upgrade=00 + path_dist_and_sub_tx_mc: s3://data.sb/switchbox/marginal_costs/ny/cambium_dist_and_sub_tx/utility=nyseg/year=2050/data.parquet + path_utility_assignment: /ebs/data/nrel/resstock/res_2024_amy2018_2_sb/metadata_utility/state=NY/utility_assignment.parquet + path_tariffs_gas: tariffs/gas + path_outputs: /data.sb/switchbox/cairo/outputs/hp_rates/ny/nyseg//ny_nyseg_run5_up00_precalc__hp_seasonal_vs_flat + path_supply_energy_mc: s3://data.sb/switchbox/marginal_costs/ny/supply/energy/utility=nyseg/year=2050/zero.parquet + path_supply_capacity_mc: s3://data.sb/switchbox/marginal_costs/ny/supply/capacity/utility=nyseg/year=2050/zero.parquet + path_tariffs_electric: + hp: /ebs/home/sherry_switch_box/rate-design-platform/rate_design/hp_rates/ny/config/tariffs/electric/adoption/nyca_electrification/year=2050/nyseg_hp_seasonal.json + non-hp: /ebs/home/sherry_switch_box/rate-design-platform/rate_design/hp_rates/ny/config/tariffs/electric/adoption/nyca_electrification/year=2050/nyseg_nonhp_flat.json + utility_revenue_requirement: null + run_includes_supply: false + run_includes_subclasses: true + path_electric_utility_stats: s3://data.sb/eia/861/electric_utility_stats/year=2024/state=NY/data.parquet + path_bulk_tx_mc: '' + solar_pv_compensation: net_metering + year_run: 2050 + year_dollar_conversion: 2025 + process_workers: 8 + elasticity: 0.0 + residual_cost_frac: 0.0 + + 606: + run_name: ny_nyseg_run6_up00_precalc_supply_y2050_mixed__hp_seasonal_vs_flat + state: NY + utility: nyseg + run_type: precalc + upgrade: '0' + path_tariff_maps_electric: tariff_maps/electric/nyseg_hp_seasonal_vs_flat_supply.csv + path_tariff_maps_gas: tariff_maps/gas/nyseg_u00.csv + path_resstock_metadata: /ebs/data/nrel/resstock/res_2024_amy2018_2_sb/adoption/nyca_electrification/year=2050/metadata-sb.parquet + path_resstock_loads: /ebs/data/nrel/resstock/res_2024_amy2018_2_sb/adoption/nyca_electrification/year=2050/load_curve_hourly/state=NY/upgrade=00 + path_dist_and_sub_tx_mc: s3://data.sb/switchbox/marginal_costs/ny/cambium_dist_and_sub_tx/utility=nyseg/year=2050/data.parquet + path_utility_assignment: /ebs/data/nrel/resstock/res_2024_amy2018_2_sb/metadata_utility/state=NY/utility_assignment.parquet + path_tariffs_gas: tariffs/gas + path_outputs: /data.sb/switchbox/cairo/outputs/hp_rates/ny/nyseg//ny_nyseg_run6_up00_precalc_supply__hp_seasonal_vs_flat + path_supply_energy_mc: s3://data.sb/nrel/cambium/2024/scenario=MidCase/t=2050/gea=NYISO/r=p127/data.parquet + path_supply_capacity_mc: s3://data.sb/nrel/cambium/2024/scenario=MidCase/t=2050/gea=NYISO/r=p127/data.parquet + path_tariffs_electric: + hp: /ebs/home/sherry_switch_box/rate-design-platform/rate_design/hp_rates/ny/config/tariffs/electric/adoption/nyca_electrification/year=2050/nyseg_hp_seasonal.json + non-hp: /ebs/home/sherry_switch_box/rate-design-platform/rate_design/hp_rates/ny/config/tariffs/electric/adoption/nyca_electrification/year=2050/nyseg_nonhp_flat.json + utility_revenue_requirement: null + run_includes_supply: true + run_includes_subclasses: true + path_electric_utility_stats: s3://data.sb/eia/861/electric_utility_stats/year=2024/state=NY/data.parquet + path_bulk_tx_mc: '' + solar_pv_compensation: net_metering + year_run: 2050 + year_dollar_conversion: 2025 + process_workers: 8 + elasticity: 0.0 + residual_cost_frac: 0.0