3636import sys
3737from gridfm_datakit .network import Network
3838from gridfm_datakit .process .process_network import init_julia
39+ from gridfm_datakit .utils .random_seed import custom_seed
3940
4041
4142def _setup_environment (
4243 config : Union [str , Dict [str , Any ], NestedNamespace ],
43- ) -> Tuple [NestedNamespace , str , Dict [str , str ]]:
44+ ) -> Tuple [NestedNamespace , str , Dict [str , str ], int ]:
4445 """Setup the environment for data generation.
4546
4647 Args:
@@ -50,7 +51,7 @@ def _setup_environment(
5051 3. NestedNamespace object (NestedNamespace)
5152
5253 Returns:
53- Tuple of (args, base_path, file_paths)
54+ Tuple of (args, base_path, file_paths, seed )
5455 """
5556 # Load config from file if a path is provided
5657 if isinstance (config , str ):
@@ -63,6 +64,25 @@ def _setup_environment(
6364 else :
6465 args = config
6566
67+ # Set global seed if provided, otherwise generate a unique seed for this generation
68+ if (
69+ hasattr (args .settings , "seed" )
70+ and args .settings .seed is not None
71+ and args .settings .seed != ""
72+ ):
73+ seed = args .settings .seed
74+ print (f"Global random seed set to: { seed } " )
75+
76+ else :
77+ # Generate a unique seed for non-reproducible but independent scenarios
78+ # This ensures scenarios are i.i.d. within a run, but different across runs
79+ import secrets
80+
81+ seed = secrets .randbelow (50_000 )
82+ # chunk_seed = seed * 20000 + start_idx + 1 < 2^31 - 1
83+ # seed < (2,147,483,647 - n_scenarios) / 20,000 ~= 100_000 so taking 50_000 to be safe
84+ print (f"No seed provided. Using seed={ seed } " )
85+
6686 # Setup output directory
6787 base_path = os .path .join (args .settings .data_dir , args .network .name , "raw" )
6888 if os .path .exists (base_path ) and args .settings .overwrite :
@@ -115,18 +135,20 @@ def _setup_environment(
115135 if log_file == file_paths ["args_log" ]:
116136 yaml .safe_dump (args .to_dict (), f )
117137
118- return args , base_path , file_paths
138+ return args , base_path , file_paths , seed
119139
120140
121141def _prepare_network_and_scenarios (
122142 args : NestedNamespace ,
123143 file_paths : Dict [str , str ],
144+ seed : int ,
124145) -> Tuple [Network , np .ndarray ]:
125146 """Prepare the network and generate load scenarios.
126147
127148 Args:
128149 args: Configuration object
129150 file_paths: Dictionary of file paths
151+ seed: Global random seed for reproducibility.
130152
131153 Returns:
132154 Tuple of (network, scenarios)
@@ -147,6 +169,7 @@ def _prepare_network_and_scenarios(
147169 args .load .scenarios ,
148170 file_paths ["scenarios_log" ],
149171 max_iter = args .settings .max_iter ,
172+ seed = seed ,
150173 )
151174 scenarios_df = load_scenarios_to_df (scenarios )
152175 scenarios_df .to_parquet (file_paths ["scenarios" ], index = False , engine = "pyarrow" )
@@ -230,10 +253,10 @@ def generate_power_flow_data(
230253 """
231254
232255 # Setup environment
233- args , base_path , file_paths = _setup_environment (config )
256+ args , base_path , file_paths , seed = _setup_environment (config )
234257
235258 # Prepare network and scenarios
236- net , scenarios = _prepare_network_and_scenarios (args , file_paths )
259+ net , scenarios = _prepare_network_and_scenarios (args , file_paths , seed )
237260
238261 # Initialize topology generator
239262 topology_generator = initialize_topology_generator (args .topology_perturbation , net )
@@ -254,48 +277,50 @@ def generate_power_flow_data(
254277
255278 processed_data = []
256279
257- # Process scenarios sequentially
258- with open (file_paths ["tqdm_log" ], "a" ) as f :
259- with tqdm (
260- total = args .load .scenarios ,
261- desc = "Processing scenarios" ,
262- file = Tee (sys .stdout , f ),
263- miniters = 5 ,
264- ) as pbar :
265- for scenario_index in range (args .load .scenarios ):
266- # Process the scenario
267- if args .settings .mode == "opf" :
268- processed_data = process_scenario_opf_mode (
269- net ,
270- scenarios ,
271- scenario_index ,
272- topology_generator ,
273- generation_generator ,
274- admittance_generator ,
275- processed_data ,
276- file_paths ["error_log" ],
277- args .settings .include_dc_res ,
278- jl ,
279- )
280- elif args .settings .mode == "pf" :
281- processed_data = process_scenario_pf_mode (
282- net ,
283- scenarios ,
284- scenario_index ,
285- topology_generator ,
286- generation_generator ,
287- admittance_generator ,
288- processed_data ,
289- file_paths ["error_log" ],
290- args .settings .include_dc_res ,
291- args .settings .pf_fast ,
292- args .settings .dcpf_fast ,
293- jl ,
294- )
295- else :
296- raise ValueError ("Invalid mode!" )
297-
298- pbar .update (1 )
280+ # Process scenarios sequentially with deterministic seed
281+ # Use custom_seed to control randomness for reproducibility
282+ with custom_seed (seed + 1 ):
283+ with open (file_paths ["tqdm_log" ], "a" ) as f :
284+ with tqdm (
285+ total = args .load .scenarios ,
286+ desc = "Processing scenarios" ,
287+ file = Tee (sys .stdout , f ),
288+ miniters = 5 ,
289+ ) as pbar :
290+ for scenario_index in range (args .load .scenarios ):
291+ # Process the scenario
292+ if args .settings .mode == "opf" :
293+ processed_data = process_scenario_opf_mode (
294+ net ,
295+ scenarios ,
296+ scenario_index ,
297+ topology_generator ,
298+ generation_generator ,
299+ admittance_generator ,
300+ processed_data ,
301+ file_paths ["error_log" ],
302+ args .settings .include_dc_res ,
303+ jl ,
304+ )
305+ elif args .settings .mode == "pf" :
306+ processed_data = process_scenario_pf_mode (
307+ net ,
308+ scenarios ,
309+ scenario_index ,
310+ topology_generator ,
311+ generation_generator ,
312+ admittance_generator ,
313+ processed_data ,
314+ file_paths ["error_log" ],
315+ args .settings .include_dc_res ,
316+ args .settings .pf_fast ,
317+ args .settings .dcpf_fast ,
318+ jl ,
319+ )
320+ else :
321+ raise ValueError ("Invalid mode!" )
322+
323+ pbar .update (1 )
299324
300325 # Save final data
301326 _save_generated_data (
@@ -339,14 +364,14 @@ def generate_power_flow_data_distributed(
339364 - scenarios_{generator}.log: Load scenario generation notes
340365 """
341366 # Setup environment
342- args , base_path , file_paths = _setup_environment (config )
367+ args , base_path , file_paths , seed = _setup_environment (config )
343368
344369 # check if mode is valid
345370 if args .settings .mode not in ["opf" , "pf" ]:
346371 raise ValueError ("Invalid mode!" )
347372
348373 # Prepare network and scenarios
349- net , scenarios = _prepare_network_and_scenarios (args , file_paths )
374+ net , scenarios = _prepare_network_and_scenarios (args , file_paths , seed )
350375
351376 # Initialize topology generator
352377 topology_generator = initialize_topology_generator (args .topology_perturbation , net )
@@ -405,6 +430,7 @@ def generate_power_flow_data_distributed(
405430 args .settings .dcpf_fast ,
406431 file_paths ["solver_log_dir" ],
407432 args .settings .max_iter ,
433+ seed ,
408434 )
409435 for chunk in scenario_chunks
410436 ]
0 commit comments