Skip to content

Commit 45a3012

Browse files
committed
Align buildstock IO helpers with buildstock-fetch outputs
1 parent 36f158f commit 45a3012

File tree

2 files changed

+200
-2
lines changed

2 files changed

+200
-2
lines changed

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,9 @@ rate_design/ny/hp_rates/data/cairo_cases/
1313
outputs/
1414
site/
1515

16+
# Test outputs (generated during tests)
17+
tests/test_outputs/
18+
1619
# Python packaging/build artifacts
1720
*.pyc
1821
*.pyo

utils/buildstock_io.py

Lines changed: 197 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,198 @@
1-
"""Stub IO helpers for working with BuildStock artifacts in data/ny/."""
1+
"""IO helpers for working with BuildStock artifacts."""
22

3-
# TODO: add load/save helpers for raw and processed BuildStock data.
3+
from pathlib import Path
4+
from typing import Optional
5+
6+
import numpy as np
7+
from buildstock_fetch.main import BuildingID, fetch_bldg_data, fetch_bldg_ids
8+
9+
10+
def get_buildstock_release_dir(
11+
output_dir: Path,
12+
release_year: str = "2024",
13+
weather_file: str = "tmy3",
14+
release_version: str = "2",
15+
) -> Path:
16+
"""Return buildstock-fetch output directory for a release.
17+
18+
Follows the naming convention:
19+
{output_dir}/res_{release_year}_{weather_file}_{release_version}/
20+
21+
Example: tests/test_data/res_2024_tmy3_2/
22+
"""
23+
release_name = f"res_{release_year}_{weather_file}_{release_version}"
24+
return output_dir / release_name
25+
26+
27+
def get_metadata_path(
28+
output_dir: Path,
29+
release_year: str = "2024",
30+
weather_file: str = "tmy3",
31+
release_version: str = "2",
32+
upgrade_id: str = "0",
33+
state: str = "NY",
34+
) -> Path:
35+
"""Return path to metadata parquet for a specific upgrade.
36+
37+
Structure: {release_dir}/metadata/state={state}/upgrade={upgrade_id}/metadata.parquet
38+
Upgrade ID is zero-padded to 2 digits (e.g., "0" -> "00", "1" -> "01")
39+
"""
40+
release_dir = get_buildstock_release_dir(output_dir, release_year, weather_file, release_version)
41+
upgrade_padded = f"{int(upgrade_id):02d}"
42+
return release_dir / "metadata" / f"state={state}" / f"upgrade={upgrade_padded}" / "metadata.parquet"
43+
44+
45+
def get_load_curve_dir(
46+
output_dir: Path,
47+
release_year: str = "2024",
48+
weather_file: str = "tmy3",
49+
release_version: str = "2",
50+
curve_subdir: str = "load_curve_hourly",
51+
) -> Path:
52+
"""Return directory containing load curves (hourly or 15-minute)."""
53+
release_dir = get_buildstock_release_dir(output_dir, release_year, weather_file, release_version)
54+
return release_dir / curve_subdir
55+
56+
57+
def get_load_curve_path(
58+
load_curve_dir: Path,
59+
bldg_id: int,
60+
state: str = "NY",
61+
upgrade_id: str = "0",
62+
) -> Path:
63+
"""Return path to load curve parquet file for a specific building/upgrade.
64+
65+
Structure: {load_curve_dir}/state={state}/upgrade={upgrade_id}/{bldg_id}-{upgrade_id_unpadded}.parquet
66+
Upgrade directory is zero-padded to 2 digits, but filename uses unpadded ID.
67+
Example: state=NY/upgrade=00/352381-0.parquet
68+
"""
69+
upgrade_padded = f"{int(upgrade_id):02d}"
70+
filename = f"{bldg_id}-{upgrade_id}.parquet"
71+
return load_curve_dir / f"state={state}" / f"upgrade={upgrade_padded}" / filename
72+
73+
74+
# ------------------------------------------------------------------------------
75+
# BuildStock-fetch interface
76+
# ------------------------------------------------------------------------------
77+
78+
def fetch_sample(
79+
*,
80+
upgrade_id: str = "0",
81+
release_year: str = "2024",
82+
weather_file: str = "tmy3",
83+
release_version: str = "2",
84+
state: str = "NY",
85+
output_dir: Path = Path("./rate_design/ny/hp_rates/data/buildstock_raw"),
86+
max_workers: int = 5,
87+
sample_size: Optional[int] = None,
88+
random_seed: Optional[int] = None,
89+
file_type: tuple[str, ...] = ("metadata", "load_curve_hourly"),
90+
) -> tuple[list[Path], list[str]]:
91+
"""Fetch a sample of N buildings for a given upgrade via buildstock-fetch.
92+
93+
This is a generic interface for fetching any upgrade data. Optionally samples
94+
a random subset of buildings for faster iteration.
95+
96+
Args:
97+
upgrade_id: Upgrade ID to fetch (e.g., "0" for baseline, "1" for upgrade)
98+
release_year: ResStock release year (e.g., "2024")
99+
weather_file: Weather file type (e.g., "tmy3")
100+
release_version: Release version number (e.g., "2")
101+
state: State abbreviation (e.g., "NY")
102+
output_dir: Directory to save downloaded files
103+
max_workers: Number of parallel download workers
104+
sample_size: Optional number of buildings to sample (None = all buildings)
105+
random_seed: Random seed for sampling reproducibility
106+
file_type: Tuple of file types to fetch (e.g., ("metadata", "load_curve_hourly"))
107+
108+
Returns:
109+
Tuple of (downloaded_paths, failed_building_ids)
110+
"""
111+
output_dir = Path(output_dir)
112+
output_dir.mkdir(parents=True, exist_ok=True)
113+
114+
# Fetch all available building IDs for this upgrade
115+
bldg_ids = fetch_bldg_ids(
116+
product="resstock",
117+
release_year=release_year,
118+
weather_file=weather_file,
119+
release_version=release_version,
120+
state=state,
121+
upgrade_id=upgrade_id,
122+
)
123+
124+
# Sample if requested
125+
if sample_size is not None and sample_size < len(bldg_ids):
126+
rng = np.random.default_rng(random_seed)
127+
all_bldg_ids = [bid.bldg_id for bid in bldg_ids]
128+
sampled_ids = set(rng.choice(all_bldg_ids, size=sample_size, replace=False))
129+
bldg_ids = [bid for bid in bldg_ids if bid.bldg_id in sampled_ids]
130+
131+
# Fetch data
132+
paths, failed = fetch_bldg_data(
133+
bldg_ids=bldg_ids,
134+
file_type=file_type,
135+
output_dir=output_dir,
136+
max_workers=max_workers,
137+
)
138+
139+
return paths, failed
140+
141+
142+
def fetch_for_building_ids(
143+
*,
144+
building_ids: list[int],
145+
upgrade_id: str,
146+
release_year: str = "2024",
147+
weather_file: str = "tmy3",
148+
release_version: str = "2",
149+
state: str = "NY",
150+
output_dir: Path = Path("./rate_design/ny/hp_rates/data/buildstock_raw"),
151+
max_workers: int = 5,
152+
file_type: tuple[str, ...] = ("metadata", "load_curve_hourly"),
153+
) -> tuple[list[Path], list[str]]:
154+
"""Fetch data for specific building IDs from a given upgrade.
155+
156+
This is useful when you already know which buildings you want to fetch
157+
(e.g., a subset selected for upgrade adoption).
158+
159+
Args:
160+
building_ids: List of building IDs to fetch
161+
upgrade_id: Upgrade ID to fetch (e.g., "0" for baseline, "1" for upgrade)
162+
release_year: ResStock release year (e.g., "2024")
163+
weather_file: Weather file type (e.g., "tmy3")
164+
release_version: Release version number (e.g., "2")
165+
state: State abbreviation (e.g., "NY")
166+
output_dir: Directory to save downloaded files
167+
max_workers: Number of parallel download workers
168+
file_type: Tuple of file types to fetch (e.g., ("metadata", "load_curve_hourly"))
169+
170+
Returns:
171+
Tuple of (downloaded_paths, failed_building_ids)
172+
"""
173+
output_dir = Path(output_dir)
174+
output_dir.mkdir(parents=True, exist_ok=True)
175+
176+
# Convert integer building IDs to BuildingID objects
177+
bldg_id_objects = [
178+
BuildingID(
179+
product="resstock",
180+
release_year=release_year,
181+
weather_file=weather_file,
182+
release_version=release_version,
183+
state=state,
184+
upgrade_id=upgrade_id,
185+
bldg_id=bid,
186+
)
187+
for bid in building_ids
188+
]
189+
190+
# Fetch data
191+
paths, failed = fetch_bldg_data(
192+
bldg_ids=bldg_id_objects,
193+
file_type=file_type,
194+
output_dir=output_dir,
195+
max_workers=max_workers,
196+
)
197+
198+
return paths, failed

0 commit comments

Comments
 (0)