33
44from pathlib import Path
55
6- from utils .buildstock_io import get_load_curve_dir , get_metadata_path
7- from utils .resstock_cumulative_adoption import (
8- build_cohort_table ,
9- build_multiple_adoption_scenarios ,
10- fetch_baseline_and_upgrade_data ,
6+ from utils .mixed_adoption_trajectory import (
7+ build_adoption_trajectory ,
8+ fetch_baseline_sample ,
119)
1210
1311# Base data directory for NY HP rates (git-ignored raw/processed, configs versioned)
2523 # Download settings
2624 "output_dir" : BASE_DATA_DIR / "buildstock_raw" ,
2725 "max_workers" : 5 ,
28- # Sampling settings (set to None to use all buildings)
29- "sample_size" : 1000 , # Number of buildings to sample (None = all buildings)
30- "sample_seed" : 123 , # Seed for sampling reproducibility
26+ # Sampling settings
27+ "sample_size" : 1000 , # Number of buildings to sample
28+ "sample_seed" : 123 , # Seed for sampling reproducibility (determines building ordering)
3129 # Adoption scenario settings
3230 "adoption_fractions" : [0.1 , 0.2 , 0.3 , 0.5 , 0.8 , 1.0 ],
33- "random_seed" : 42 , # For reproducibility
3431 # Output settings
3532 "processed_dir" : BASE_DATA_DIR / "buildstock_processed" ,
36- "cohort_table_path" : BASE_DATA_DIR / "buildstock_processed" / "cohort_table.parquet" ,
3733}
3834
3935
@@ -47,119 +43,66 @@ def main():
4743 print (f" { key } : { value } " )
4844 print ("\n " )
4945
50- # Step 1: Fetch baseline and upgrade data
46+ # Step 1: Fetch baseline sample and establish building ID ordering
5147 print ("\n " + "=" * 80 )
52- print ("STEP 1: Fetching baseline and heat pump upgrade data " )
48+ print ("STEP 1: Fetching baseline sample " )
5349 print ("=" * 80 )
50+ print (f"Fetching { CONFIG ['sample_size' ]} baseline buildings (seed={ CONFIG ['sample_seed' ]} )" )
5451
55- baseline_paths , hp_paths , failed = fetch_baseline_and_upgrade_data (
52+ baseline_metadata_path , building_ids = fetch_baseline_sample (
53+ sample_size = CONFIG ["sample_size" ],
54+ random_seed = CONFIG ["sample_seed" ],
5655 release_year = CONFIG ["release_year" ],
5756 weather_file = CONFIG ["weather_file" ],
5857 release_version = CONFIG ["release_version" ],
5958 state = CONFIG ["state" ],
60- hp_upgrade_id = CONFIG ["hp_upgrade_id" ],
6159 output_dir = CONFIG ["output_dir" ],
6260 max_workers = CONFIG ["max_workers" ],
63- sample_size = CONFIG ["sample_size" ],
64- random_seed = CONFIG ["sample_seed" ],
6561 )
6662
67- if failed :
68- print (f"\n ⚠️ Warning: { len (failed )} files failed to download:" )
69- for f in failed [:5 ]: # Show first 5
70- print (f" - { f } " )
71- if len (failed ) > 5 :
72- print (f" ... and { len (failed ) - 5 } more" )
73-
74- print (f"\n ✓ Downloaded { len (baseline_paths )} baseline files" )
75- print (f"✓ Downloaded { len (hp_paths )} HP upgrade files" )
63+ print (f"\n ✓ Fetched { len (building_ids )} baseline buildings" )
64+ print (f"✓ Baseline metadata: { baseline_metadata_path } " )
65+ print (f"✓ Building ID ordering established (deterministic from seed)" )
7666
77- # Step 2: Build cohort table
67+ # Step 2: Build adoption trajectory
7868 print ("\n " + "=" * 80 )
79- print ("STEP 2: Building cohort table " )
69+ print ("STEP 2: Building adoption trajectory " )
8070 print ("=" * 80 )
71+ print (f"Creating scenarios for adoption fractions: { CONFIG ['adoption_fractions' ]} " )
72+ print ("Note: Upgrade data will be fetched incrementally for each fraction" )
8173
82- # Get metadata paths
83- baseline_metadata = get_metadata_path (
84- output_dir = CONFIG ["output_dir" ],
85- release_year = CONFIG ["release_year" ],
86- weather_file = CONFIG ["weather_file" ],
87- release_version = CONFIG ["release_version" ],
88- upgrade_id = "0" ,
89- )
90-
91- hp_metadata = get_metadata_path (
92- output_dir = CONFIG ["output_dir" ],
93- release_year = CONFIG ["release_year" ],
94- weather_file = CONFIG ["weather_file" ],
95- release_version = CONFIG ["release_version" ],
74+ scenario_paths = build_adoption_trajectory (
75+ baseline_metadata_path = baseline_metadata_path ,
76+ baseline_building_ids = building_ids ,
77+ adoption_fractions = CONFIG ["adoption_fractions" ],
9678 upgrade_id = CONFIG ["hp_upgrade_id" ],
97- )
98-
99- # Get load curve directories
100- baseline_load_dir = get_load_curve_dir (
101- output_dir = CONFIG ["output_dir" ],
10279 release_year = CONFIG ["release_year" ],
10380 weather_file = CONFIG ["weather_file" ],
10481 release_version = CONFIG ["release_version" ],
82+ state = CONFIG ["state" ],
83+ output_dir = CONFIG ["output_dir" ],
84+ max_workers = CONFIG ["max_workers" ],
85+ output_processed_dir = CONFIG ["processed_dir" ],
10586 )
10687
107- hp_load_dir = baseline_load_dir # Same directory, different upgrade subdirs
108-
109- print (f"Baseline metadata: { baseline_metadata } " )
110- print (f"HP metadata: { hp_metadata } " )
111- print (f"Load curve directory: { baseline_load_dir } " )
112-
113- cohort_df = build_cohort_table (
114- baseline_metadata_path = baseline_metadata ,
115- hp_metadata_path = hp_metadata ,
116- baseline_load_dir = baseline_load_dir ,
117- hp_load_dir = hp_load_dir ,
118- output_path = CONFIG ["cohort_table_path" ],
119- )
120-
121- print (f"\n ✓ Built cohort table with { len (cohort_df )} buildings" )
122- print (f"✓ Saved to { CONFIG ['cohort_table_path' ]} " )
123-
124- # Show sample of cohort table
125- print ("\n Cohort table sample:" )
126- print (cohort_df .head (3 ))
127-
128- # Show heat pump type distribution if available
129- if "hp_type" in cohort_df .columns :
130- print ("\n Heat pump type distribution:" )
131- hp_type_counts = cohort_df .group_by ("hp_type" ).count ().sort ("count" , descending = True )
132- for row in hp_type_counts .iter_rows (named = True ):
133- print (f" { row ['hp_type' ]} : { row ['count' ]} buildings" )
134-
135- # Step 3: Build multiple adoption scenarios
136- print ("\n " + "=" * 80 )
137- print ("STEP 3: Building adoption scenarios" )
138- print ("=" * 80 )
139-
140- scenario_paths = build_multiple_adoption_scenarios (
141- cohort_df = cohort_df ,
142- adoption_fractions = CONFIG ["adoption_fractions" ],
143- seed = CONFIG ["random_seed" ],
144- output_dir = CONFIG ["processed_dir" ],
145- include_metadata = True ,
146- )
147-
88+ # Summary
14889 print ("\n " + "=" * 80 )
14990 print ("COMPLETE - Scenario Summary" )
15091 print ("=" * 80 )
15192 print (f"\n Generated { len (scenario_paths )} adoption scenarios:" )
15293 for fraction , path in sorted (scenario_paths .items ()):
153- print (f" { fraction * 100 :3.0f} % adoption → { path } " )
94+ n_adopters = int (round (fraction * len (building_ids )))
95+ print (f" { fraction * 100 :3.0f} % adoption ({ n_adopters :4d} buildings) → { path .name } " )
15496
155- print (f"\n All scenarios use the same random seed ( { CONFIG ['random_seed ' ]} ) ensuring:" )
97+ print (f"\n All scenarios use seed { CONFIG ['sample_seed ' ]} ensuring:" )
15698 print (" - Reproducibility: Re-running with same seed gives identical results" )
15799 print (" - Cumulative property: Adopters at X% ⊆ Adopters at Y% for X < Y" )
100+ print (" - Efficiency: Upgrade data fetched only for buildings that adopt" )
158101
159102 print ("\n Next steps:" )
160103 print (" - Load scenarios with: pl.read_parquet(path)" )
104+ print (" - Check 'adopted' column (0=baseline, 1=upgrade)" )
161105 print (" - Use for GenX/CAIRO modeling" )
162- print (" - Analyze adoption trajectories" )
163106 print ("\n ✓ Done!" )
164107
165108
0 commit comments