@@ -264,6 +264,12 @@ def create_parser(cls) -> argparse.ArgumentParser:
264264 default = None ,
265265 help = "Max parallel instances (auto if omitted)" ,
266266 )
267+ g_limits .add_argument (
268+ "--max-startup-parallel" ,
269+ type = int ,
270+ default = None ,
271+ help = "Max parallel startup initializations (default: min(10,max-parallel))" ,
272+ )
267273 g_limits .add_argument (
268274 "--timeout" , type = int , default = 3600 , help = "Timeout per instance (seconds)"
269275 )
@@ -844,6 +850,7 @@ def _add(field: str, reason: str, example: str = ""):
844850 _add ("import_conflict_policy" , "must be fail|overwrite|suffix" , "fail" )
845851 except Exception :
846852 pass
853+ # Allow 'auto' for both
847854 try :
848855 mpi = full_config .get ("orchestration" , {}).get (
849856 "max_parallel_instances" , "auto"
@@ -864,6 +871,26 @@ def _add(field: str, reason: str, example: str = ""):
864871 "must be integer or 'auto'" ,
865872 "auto" ,
866873 )
874+ try :
875+ mps = full_config .get ("orchestration" , {}).get (
876+ "max_parallel_startup" , "auto"
877+ )
878+ if isinstance (mps , str ) and mps .lower () == "auto" :
879+ pass
880+ else :
881+ v2 = int (mps )
882+ if v2 <= 0 :
883+ _add (
884+ "orchestration.max_parallel_startup" ,
885+ "must be > 0 or 'auto'" ,
886+ "auto" ,
887+ )
888+ except Exception :
889+ _add (
890+ "orchestration.max_parallel_startup" ,
891+ "must be integer or 'auto'" ,
892+ "auto" ,
893+ )
867894 # Strategy exists
868895 try :
869896 strategy = full_config .get ("strategy" , args .strategy )
@@ -1598,6 +1625,10 @@ async def run_orchestrator(self, args: argparse.Namespace) -> int:
15981625 cli_config .setdefault ("orchestration" , {})[
15991626 "max_parallel_instances"
16001627 ] = args .max_parallel
1628+ if getattr (args , "max_startup_parallel" , None ):
1629+ cli_config .setdefault ("orchestration" , {})[
1630+ "max_parallel_startup"
1631+ ] = args .max_startup_parallel
16011632 if hasattr (args , "timeout" ) and args .timeout :
16021633 cli_config .setdefault ("runner" , {})["timeout" ] = args .timeout
16031634 if hasattr (args , "force_commit" ) and args .force_commit :
@@ -1800,6 +1831,9 @@ def _red(k, v):
18001831
18011832 # Get orchestration settings from merged config
18021833 max_parallel = full_config ["orchestration" ]["max_parallel_instances" ]
1834+ max_startup_parallel = full_config ["orchestration" ].get (
1835+ "max_parallel_startup" , "auto"
1836+ )
18031837 state_dir = full_config .get ("orchestration" , {}).get (
18041838 "state_dir"
18051839 ) or full_config .get ("state_dir" , Path ("./pitaya_state" ))
@@ -1820,17 +1854,38 @@ def _red(k, v):
18201854 # Respect global session volume consent by setting env for runner
18211855 allow_global_session = bool (getattr (args , "allow_global_session_volume" , False ))
18221856
1823- # Resolve max_parallel without host resource calculations
1824- try :
1825- if isinstance (max_parallel , str ):
1826- # Accept numeric strings; treat non-numeric (e.g., 'auto') as default 5
1857+ # Resolve parallelism
1858+ import os as _os
1859+
1860+ def _cpu_default () -> int :
1861+ try :
1862+ return max (2 , int (_os .cpu_count () or 2 ))
1863+ except Exception :
1864+ return 2
1865+
1866+ # Total parallel: auto -> cpu-based
1867+ if isinstance (max_parallel , str ):
1868+ if max_parallel .lower () == "auto" :
1869+ max_parallel_val = _cpu_default ()
1870+ else :
18271871 max_parallel_val = int (max_parallel )
1828- elif isinstance (max_parallel , int ):
1829- max_parallel_val = max_parallel
1872+ elif isinstance (max_parallel , int ):
1873+ max_parallel_val = max (1 , max_parallel )
1874+ else :
1875+ max_parallel_val = _cpu_default ()
1876+
1877+ # Startup parallel: auto -> min(10, total)
1878+ if isinstance (max_startup_parallel , str ):
1879+ if max_startup_parallel .lower () == "auto" :
1880+ max_startup_parallel_val = min (10 , max_parallel_val )
18301881 else :
1831- max_parallel_val = 5
1832- except Exception :
1833- max_parallel_val = 5
1882+ max_startup_parallel_val = int (max_startup_parallel )
1883+ elif isinstance (max_startup_parallel , int ):
1884+ max_startup_parallel_val = max (1 , max_startup_parallel )
1885+ else :
1886+ max_startup_parallel_val = min (10 , max_parallel_val )
1887+ # Clamp startup to not exceed total
1888+ max_startup_parallel_val = min (max_startup_parallel_val , max_parallel_val )
18341889
18351890 # Proxy automatic egress defaults removed
18361891
@@ -1851,6 +1906,7 @@ def _red(k, v):
18511906 # Create orchestrator
18521907 self .orchestrator = Orchestrator (
18531908 max_parallel_instances = max_parallel_val ,
1909+ max_parallel_startup = max_startup_parallel_val ,
18541910 state_dir = Path (state_dir ),
18551911 logs_dir = Path (logs_dir ),
18561912 container_limits = container_limits ,
0 commit comments