Skip to content

Commit e259c4b

Browse files
authored
Merge pull request #35 from gridfm/pm_correction_and_seed_fix
fix of seed + gen cost permutation + pv to pq
2 parents 1d7cd24 + 9aa34ba commit e259c4b

20 files changed

+783
-159
lines changed

README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -187,6 +187,8 @@ settings:
187187
pf_fast: true # Whether to use fast PF solver by default (compute_ac_pf from powermodels.jl); if false, uses Ipopt-based PF. Some networks (typically large ones e.g. case10000_goc) do not work with pf_fast: true. pf_fast is faster and more accurate than the Ipopt-based PF.
188188
dcpf_fast: true # Whether to use fast DCPF solver by default (compute_dc_pf from PowerModels.jl)
189189
max_iter: 200 # Max iterations for Ipopt-based solvers
190+
seed: null # Seed for random number generation. If null, a random seed is generated (RECOMMENDED). To get the same data across runs, set the seed and note that ALL OTHER PARAMETERS IN THE CONFIG FILE MUST BE THE SAME.
191+
190192
```
191193

192194
<br>

gridfm_datakit/generate.py

Lines changed: 75 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -36,11 +36,12 @@
3636
import sys
3737
from gridfm_datakit.network import Network
3838
from gridfm_datakit.process.process_network import init_julia
39+
from gridfm_datakit.utils.random_seed import custom_seed
3940

4041

4142
def _setup_environment(
4243
config: Union[str, Dict[str, Any], NestedNamespace],
43-
) -> Tuple[NestedNamespace, str, Dict[str, str]]:
44+
) -> Tuple[NestedNamespace, str, Dict[str, str], int]:
4445
"""Setup the environment for data generation.
4546
4647
Args:
@@ -50,7 +51,7 @@ def _setup_environment(
5051
3. NestedNamespace object (NestedNamespace)
5152
5253
Returns:
53-
Tuple of (args, base_path, file_paths)
54+
Tuple of (args, base_path, file_paths, seed)
5455
"""
5556
# Load config from file if a path is provided
5657
if isinstance(config, str):
@@ -63,6 +64,25 @@ def _setup_environment(
6364
else:
6465
args = config
6566

67+
# Set global seed if provided, otherwise generate a unique seed for this generation
68+
if (
69+
hasattr(args.settings, "seed")
70+
and args.settings.seed is not None
71+
and args.settings.seed != ""
72+
):
73+
seed = args.settings.seed
74+
print(f"Global random seed set to: {seed}")
75+
76+
else:
77+
# Generate a unique seed for non-reproducible but independent scenarios
78+
# This ensures scenarios are i.i.d. within a run, but different across runs
79+
import secrets
80+
81+
seed = secrets.randbelow(50_000)
82+
# chunk_seed = seed * 20000 + start_idx + 1 < 2^31 - 1
83+
# seed < (2,147,483,647 - n_scenarios) / 20,000 ~= 100_000 so taking 50_000 to be safe
84+
print(f"No seed provided. Using seed={seed}")
85+
6686
# Setup output directory
6787
base_path = os.path.join(args.settings.data_dir, args.network.name, "raw")
6888
if os.path.exists(base_path) and args.settings.overwrite:
@@ -115,18 +135,20 @@ def _setup_environment(
115135
if log_file == file_paths["args_log"]:
116136
yaml.safe_dump(args.to_dict(), f)
117137

118-
return args, base_path, file_paths
138+
return args, base_path, file_paths, seed
119139

120140

121141
def _prepare_network_and_scenarios(
122142
args: NestedNamespace,
123143
file_paths: Dict[str, str],
144+
seed: int,
124145
) -> Tuple[Network, np.ndarray]:
125146
"""Prepare the network and generate load scenarios.
126147
127148
Args:
128149
args: Configuration object
129150
file_paths: Dictionary of file paths
151+
seed: Global random seed for reproducibility.
130152
131153
Returns:
132154
Tuple of (network, scenarios)
@@ -147,6 +169,7 @@ def _prepare_network_and_scenarios(
147169
args.load.scenarios,
148170
file_paths["scenarios_log"],
149171
max_iter=args.settings.max_iter,
172+
seed=seed,
150173
)
151174
scenarios_df = load_scenarios_to_df(scenarios)
152175
scenarios_df.to_parquet(file_paths["scenarios"], index=False, engine="pyarrow")
@@ -230,10 +253,10 @@ def generate_power_flow_data(
230253
"""
231254

232255
# Setup environment
233-
args, base_path, file_paths = _setup_environment(config)
256+
args, base_path, file_paths, seed = _setup_environment(config)
234257

235258
# Prepare network and scenarios
236-
net, scenarios = _prepare_network_and_scenarios(args, file_paths)
259+
net, scenarios = _prepare_network_and_scenarios(args, file_paths, seed)
237260

238261
# Initialize topology generator
239262
topology_generator = initialize_topology_generator(args.topology_perturbation, net)
@@ -254,48 +277,50 @@ def generate_power_flow_data(
254277

255278
processed_data = []
256279

257-
# Process scenarios sequentially
258-
with open(file_paths["tqdm_log"], "a") as f:
259-
with tqdm(
260-
total=args.load.scenarios,
261-
desc="Processing scenarios",
262-
file=Tee(sys.stdout, f),
263-
miniters=5,
264-
) as pbar:
265-
for scenario_index in range(args.load.scenarios):
266-
# Process the scenario
267-
if args.settings.mode == "opf":
268-
processed_data = process_scenario_opf_mode(
269-
net,
270-
scenarios,
271-
scenario_index,
272-
topology_generator,
273-
generation_generator,
274-
admittance_generator,
275-
processed_data,
276-
file_paths["error_log"],
277-
args.settings.include_dc_res,
278-
jl,
279-
)
280-
elif args.settings.mode == "pf":
281-
processed_data = process_scenario_pf_mode(
282-
net,
283-
scenarios,
284-
scenario_index,
285-
topology_generator,
286-
generation_generator,
287-
admittance_generator,
288-
processed_data,
289-
file_paths["error_log"],
290-
args.settings.include_dc_res,
291-
args.settings.pf_fast,
292-
args.settings.dcpf_fast,
293-
jl,
294-
)
295-
else:
296-
raise ValueError("Invalid mode!")
297-
298-
pbar.update(1)
280+
# Process scenarios sequentially with deterministic seed
281+
# Use custom_seed to control randomness for reproducibility
282+
with custom_seed(seed + 1):
283+
with open(file_paths["tqdm_log"], "a") as f:
284+
with tqdm(
285+
total=args.load.scenarios,
286+
desc="Processing scenarios",
287+
file=Tee(sys.stdout, f),
288+
miniters=5,
289+
) as pbar:
290+
for scenario_index in range(args.load.scenarios):
291+
# Process the scenario
292+
if args.settings.mode == "opf":
293+
processed_data = process_scenario_opf_mode(
294+
net,
295+
scenarios,
296+
scenario_index,
297+
topology_generator,
298+
generation_generator,
299+
admittance_generator,
300+
processed_data,
301+
file_paths["error_log"],
302+
args.settings.include_dc_res,
303+
jl,
304+
)
305+
elif args.settings.mode == "pf":
306+
processed_data = process_scenario_pf_mode(
307+
net,
308+
scenarios,
309+
scenario_index,
310+
topology_generator,
311+
generation_generator,
312+
admittance_generator,
313+
processed_data,
314+
file_paths["error_log"],
315+
args.settings.include_dc_res,
316+
args.settings.pf_fast,
317+
args.settings.dcpf_fast,
318+
jl,
319+
)
320+
else:
321+
raise ValueError("Invalid mode!")
322+
323+
pbar.update(1)
299324

300325
# Save final data
301326
_save_generated_data(
@@ -339,14 +364,14 @@ def generate_power_flow_data_distributed(
339364
- scenarios_{generator}.log: Load scenario generation notes
340365
"""
341366
# Setup environment
342-
args, base_path, file_paths = _setup_environment(config)
367+
args, base_path, file_paths, seed = _setup_environment(config)
343368

344369
# check if mode is valid
345370
if args.settings.mode not in ["opf", "pf"]:
346371
raise ValueError("Invalid mode!")
347372

348373
# Prepare network and scenarios
349-
net, scenarios = _prepare_network_and_scenarios(args, file_paths)
374+
net, scenarios = _prepare_network_and_scenarios(args, file_paths, seed)
350375

351376
# Initialize topology generator
352377
topology_generator = initialize_topology_generator(args.topology_perturbation, net)
@@ -405,6 +430,7 @@ def generate_power_flow_data_distributed(
405430
args.settings.dcpf_fast,
406431
file_paths["solver_log_dir"],
407432
args.settings.max_iter,
433+
seed,
408434
)
409435
for chunk in scenario_chunks
410436
]

gridfm_datakit/interactive.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,7 @@ def create_config() -> Dict[str, Any]:
9797
"dcpf_fast": dcpf_fast.value,
9898
"enable_solver_logs": enable_solver_logs.value,
9999
"max_iter": max_iter.value,
100+
"seed": None, # seed is not used in the interactive interface
100101
},
101102
}
102103
return config

gridfm_datakit/network.py

Lines changed: 87 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
"""
77

88
import os
9+
import shutil
910
import requests
1011
from importlib import resources
1112
import pandas as pd
@@ -22,6 +23,8 @@
2223
VM,
2324
VA,
2425
REF,
26+
PV,
27+
PQ,
2528
)
2629
from gridfm_datakit.utils.idx_gen import GEN_BUS, GEN_STATUS, PG, QG
2730
from gridfm_datakit.utils.idx_brch import (
@@ -36,12 +39,71 @@
3639
BR_R_ASYM,
3740
BR_X_ASYM,
3841
)
39-
from gridfm_datakit.utils.idx_cost import NCOST
42+
from gridfm_datakit.utils.idx_cost import NCOST, MODEL, POLYNOMIAL
4043
import warnings
4144
import networkx as nx
4245
import numpy as np
4346
import copy
4447
from typing import Dict, Tuple, Any
48+
import tempfile
49+
from juliapkg.state import STATE
50+
from juliapkg.deps import run_julia, executable
51+
52+
53+
def correct_network(network_path: str, force: bool = False) -> str:
54+
"""
55+
Load a MATPOWER network using PowerModels via run_julia
56+
and save a corrected version.
57+
58+
Args:
59+
network_path: Path to the original MATPOWER .m file.
60+
force: If True, regenerate the corrected file even if it exists.
61+
62+
Returns:
63+
Path to the corrected network file.
64+
65+
Raises:
66+
FileNotFoundError: If input file does not exist.
67+
RuntimeError: If PowerModels fails.
68+
"""
69+
if not os.path.exists(network_path):
70+
raise FileNotFoundError(f"Network file not found: {network_path}")
71+
72+
base_path, ext = os.path.splitext(network_path)
73+
corrected_path = f"{base_path}_corrected{ext}"
74+
75+
if os.path.exists(corrected_path) and not force:
76+
return corrected_path
77+
78+
# Use temporary file for atomic replace
79+
tmp_fd, tmp_path = tempfile.mkstemp(suffix=".m")
80+
os.close(tmp_fd)
81+
82+
try:
83+
project = STATE["project"]
84+
jl_exe = executable()
85+
86+
# Julia script as a list of lines
87+
julia_code = [
88+
"using PowerModels",
89+
f'data = PowerModels.parse_file("{network_path}")',
90+
f'PowerModels.export_matpower("{tmp_path}", data)',
91+
]
92+
93+
# Run Julia
94+
run_julia(julia_code, project=project, executable=jl_exe)
95+
96+
# Sanity check
97+
if not os.path.exists(tmp_path) or os.path.getsize(tmp_path) == 0:
98+
raise RuntimeError("Julia produced empty MATPOWER file")
99+
100+
# Atomically replace target file (use shutil.move to allow cross-device)
101+
shutil.move(tmp_path, corrected_path)
102+
return corrected_path
103+
104+
finally:
105+
if os.path.exists(tmp_path):
106+
os.unlink(tmp_path)
45107

46108

47109
def numpy_to_matlab_matrix(array: np.ndarray, name: str) -> str:
@@ -132,6 +194,11 @@ def __init__(self, mpc: Dict[str, Any]) -> None:
132194
assert np.all(np.isin(self.gens[:, GEN_BUS], self.buses[:, BUS_I])), (
133195
"All generator buses should be in bus IDs"
134196
)
197+
198+
assert np.all(self.gencosts[:, MODEL] == POLYNOMIAL), (
199+
"MODEL should be POLYNOMIAL"
200+
)
201+
135202
# assert all generators have the same number of cost coefficients
136203
assert np.all(self.gencosts[:, NCOST] == self.gencosts[:, NCOST][0]), (
137204
"All generators must have the same number of cost coefficients"
@@ -345,6 +412,21 @@ def deactivate_gens(self, idx_gens: np.ndarray) -> None:
345412
)
346413
self.gens[idx_gens, GEN_STATUS] = 0
347414

415+
# -----------------------------
416+
# Update PV buses that lost all generators → PQ
417+
# -----------------------------
418+
n_buses = self.buses.shape[0]
419+
420+
# Count in-service generators per bus
421+
gens_on = self.gens[self.idx_gens_in_service]
422+
gen_count = np.bincount(gens_on[:, GEN_BUS].astype(int), minlength=n_buses)
423+
424+
# Boolean mask: PV buses with no in-service generator
425+
pv_no_gen = (self.buses[:, BUS_TYPE] == PV) & (gen_count == 0)
426+
427+
# Set them to PQ
428+
self.buses[pv_no_gen, BUS_TYPE] = PQ
429+
348430
def check_single_connected_component(self) -> bool:
349431
"""
350432
Check that the network forms a single connected component.
@@ -541,6 +623,7 @@ def load_net_from_file(network_path: str) -> Network:
541623
ValueError: If the file format is invalid.
542624
"""
543625
# Load network using matpowercaseframes
626+
network_path = correct_network(network_path)
544627
mpc_frames = CaseFrames(network_path)
545628
mpc = {
546629
key: mpc_frames.__getattribute__(key)
@@ -569,6 +652,7 @@ def load_net_from_pglib(grid_name: str) -> Network:
569652
FileNotFoundError: If the file cannot be found after download.
570653
ValueError: If the file format is invalid.
571654
"""
655+
572656
# Construct file paths
573657
file_path = str(
574658
resources.files("gridfm_datakit.grids").joinpath(f"pglib_opf_{grid_name}.m"),
@@ -586,6 +670,8 @@ def load_net_from_pglib(grid_name: str) -> Network:
586670
with open(file_path, "wb") as f:
587671
f.write(response.content)
588672

673+
file_path = correct_network(file_path)
674+
589675
# Load network using matpowercaseframes
590676
mpc_frames = CaseFrames(file_path)
591677
mpc = {

0 commit comments

Comments
 (0)