Skip to content

Commit 12df117

Browse files
committed
refactor for clarity and better naming / org
1 parent 26e08cb commit 12df117

File tree

13 files changed

+539
-777
lines changed

13 files changed

+539
-777
lines changed

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,9 @@ rate_design/ny/hp_rates/data/cairo_cases/
1313
outputs/
1414
site/
1515

16+
# Test outputs (generated during tests)
17+
tests/test_outputs/
18+
1619
# Python packaging/build artifacts
1720
*.pyc
1821
*.pyo

rate_design/ny/hp_rates/scripts/generate_ny_hp_scenarios.py

Lines changed: 33 additions & 90 deletions
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,9 @@
33

44
from pathlib import Path
55

6-
from utils.buildstock_io import get_load_curve_dir, get_metadata_path
7-
from utils.resstock_cumulative_adoption import (
8-
build_cohort_table,
9-
build_multiple_adoption_scenarios,
10-
fetch_baseline_and_upgrade_data,
6+
from utils.mixed_adoption_trajectory import (
7+
build_adoption_trajectory,
8+
fetch_baseline_sample,
119
)
1210

1311
# Base data directory for NY HP rates (git-ignored raw/processed, configs versioned)
@@ -25,15 +23,13 @@
2523
# Download settings
2624
"output_dir": BASE_DATA_DIR / "buildstock_raw",
2725
"max_workers": 5,
28-
# Sampling settings (set to None to use all buildings)
29-
"sample_size": 1000, # Number of buildings to sample (None = all buildings)
30-
"sample_seed": 123, # Seed for sampling reproducibility
26+
# Sampling settings
27+
"sample_size": 1000, # Number of buildings to sample
28+
"sample_seed": 123, # Seed for sampling reproducibility (determines building ordering)
3129
# Adoption scenario settings
3230
"adoption_fractions": [0.1, 0.2, 0.3, 0.5, 0.8, 1.0],
33-
"random_seed": 42, # For reproducibility
3431
# Output settings
3532
"processed_dir": BASE_DATA_DIR / "buildstock_processed",
36-
"cohort_table_path": BASE_DATA_DIR / "buildstock_processed" / "cohort_table.parquet",
3733
}
3834

3935

@@ -47,119 +43,66 @@ def main():
4743
print(f" {key}: {value}")
4844
print("\n")
4945

50-
# Step 1: Fetch baseline and upgrade data
46+
# Step 1: Fetch baseline sample and establish building ID ordering
5147
print("\n" + "=" * 80)
52-
print("STEP 1: Fetching baseline and heat pump upgrade data")
48+
print("STEP 1: Fetching baseline sample")
5349
print("=" * 80)
50+
print(f"Fetching {CONFIG['sample_size']} baseline buildings (seed={CONFIG['sample_seed']})")
5451

55-
baseline_paths, hp_paths, failed = fetch_baseline_and_upgrade_data(
52+
baseline_metadata_path, building_ids = fetch_baseline_sample(
53+
sample_size=CONFIG["sample_size"],
54+
random_seed=CONFIG["sample_seed"],
5655
release_year=CONFIG["release_year"],
5756
weather_file=CONFIG["weather_file"],
5857
release_version=CONFIG["release_version"],
5958
state=CONFIG["state"],
60-
hp_upgrade_id=CONFIG["hp_upgrade_id"],
6159
output_dir=CONFIG["output_dir"],
6260
max_workers=CONFIG["max_workers"],
63-
sample_size=CONFIG["sample_size"],
64-
random_seed=CONFIG["sample_seed"],
6561
)
6662

67-
if failed:
68-
print(f"\n⚠️ Warning: {len(failed)} files failed to download:")
69-
for f in failed[:5]: # Show first 5
70-
print(f" - {f}")
71-
if len(failed) > 5:
72-
print(f" ... and {len(failed) - 5} more")
73-
74-
print(f"\n✓ Downloaded {len(baseline_paths)} baseline files")
75-
print(f"✓ Downloaded {len(hp_paths)} HP upgrade files")
63+
print(f"\n✓ Fetched {len(building_ids)} baseline buildings")
64+
print(f"✓ Baseline metadata: {baseline_metadata_path}")
65+
print(f"✓ Building ID ordering established (deterministic from seed)")
7666

77-
# Step 2: Build cohort table
67+
# Step 2: Build adoption trajectory
7868
print("\n" + "=" * 80)
79-
print("STEP 2: Building cohort table")
69+
print("STEP 2: Building adoption trajectory")
8070
print("=" * 80)
71+
print(f"Creating scenarios for adoption fractions: {CONFIG['adoption_fractions']}")
72+
print("Note: Upgrade data will be fetched incrementally for each fraction")
8173

82-
# Get metadata paths
83-
baseline_metadata = get_metadata_path(
84-
output_dir=CONFIG["output_dir"],
85-
release_year=CONFIG["release_year"],
86-
weather_file=CONFIG["weather_file"],
87-
release_version=CONFIG["release_version"],
88-
upgrade_id="0",
89-
)
90-
91-
hp_metadata = get_metadata_path(
92-
output_dir=CONFIG["output_dir"],
93-
release_year=CONFIG["release_year"],
94-
weather_file=CONFIG["weather_file"],
95-
release_version=CONFIG["release_version"],
74+
scenario_paths = build_adoption_trajectory(
75+
baseline_metadata_path=baseline_metadata_path,
76+
baseline_building_ids=building_ids,
77+
adoption_fractions=CONFIG["adoption_fractions"],
9678
upgrade_id=CONFIG["hp_upgrade_id"],
97-
)
98-
99-
# Get load curve directories
100-
baseline_load_dir = get_load_curve_dir(
101-
output_dir=CONFIG["output_dir"],
10279
release_year=CONFIG["release_year"],
10380
weather_file=CONFIG["weather_file"],
10481
release_version=CONFIG["release_version"],
82+
state=CONFIG["state"],
83+
output_dir=CONFIG["output_dir"],
84+
max_workers=CONFIG["max_workers"],
85+
output_processed_dir=CONFIG["processed_dir"],
10586
)
10687

107-
hp_load_dir = baseline_load_dir # Same directory, different upgrade subdirs
108-
109-
print(f"Baseline metadata: {baseline_metadata}")
110-
print(f"HP metadata: {hp_metadata}")
111-
print(f"Load curve directory: {baseline_load_dir}")
112-
113-
cohort_df = build_cohort_table(
114-
baseline_metadata_path=baseline_metadata,
115-
hp_metadata_path=hp_metadata,
116-
baseline_load_dir=baseline_load_dir,
117-
hp_load_dir=hp_load_dir,
118-
output_path=CONFIG["cohort_table_path"],
119-
)
120-
121-
print(f"\n✓ Built cohort table with {len(cohort_df)} buildings")
122-
print(f"✓ Saved to {CONFIG['cohort_table_path']}")
123-
124-
# Show sample of cohort table
125-
print("\nCohort table sample:")
126-
print(cohort_df.head(3))
127-
128-
# Show heat pump type distribution if available
129-
if "hp_type" in cohort_df.columns:
130-
print("\nHeat pump type distribution:")
131-
hp_type_counts = cohort_df.group_by("hp_type").count().sort("count", descending=True)
132-
for row in hp_type_counts.iter_rows(named=True):
133-
print(f" {row['hp_type']}: {row['count']} buildings")
134-
135-
# Step 3: Build multiple adoption scenarios
136-
print("\n" + "=" * 80)
137-
print("STEP 3: Building adoption scenarios")
138-
print("=" * 80)
139-
140-
scenario_paths = build_multiple_adoption_scenarios(
141-
cohort_df=cohort_df,
142-
adoption_fractions=CONFIG["adoption_fractions"],
143-
seed=CONFIG["random_seed"],
144-
output_dir=CONFIG["processed_dir"],
145-
include_metadata=True,
146-
)
147-
88+
# Summary
14889
print("\n" + "=" * 80)
14990
print("COMPLETE - Scenario Summary")
15091
print("=" * 80)
15192
print(f"\nGenerated {len(scenario_paths)} adoption scenarios:")
15293
for fraction, path in sorted(scenario_paths.items()):
153-
print(f" {fraction*100:3.0f}% adoption → {path}")
94+
n_adopters = int(round(fraction * len(building_ids)))
95+
print(f" {fraction*100:3.0f}% adoption ({n_adopters:4d} buildings) → {path.name}")
15496

155-
print(f"\nAll scenarios use the same random seed ({CONFIG['random_seed']}) ensuring:")
97+
print(f"\nAll scenarios use seed {CONFIG['sample_seed']} ensuring:")
15698
print(" - Reproducibility: Re-running with same seed gives identical results")
15799
print(" - Cumulative property: Adopters at X% ⊆ Adopters at Y% for X < Y")
100+
print(" - Efficiency: Upgrade data fetched only for buildings that adopt")
158101

159102
print("\nNext steps:")
160103
print(" - Load scenarios with: pl.read_parquet(path)")
104+
print(" - Check 'adopted' column (0=baseline, 1=upgrade)")
161105
print(" - Use for GenX/CAIRO modeling")
162-
print(" - Analyze adoption trajectories")
163106
print("\n✓ Done!")
164107

165108

tests/test_cohort_builder.py

Lines changed: 0 additions & 204 deletions
This file was deleted.

0 commit comments

Comments
 (0)