@@ -45,9 +45,30 @@ def list_runs(self, config: BenchmarkConfig) -> Iterable[RunJournal]:
4545 def get_run_plan (self , config : BenchmarkConfig , tests : Sequence [str ], execution_mode : str = "remote" ):
4646 return self ._run_service .get_run_plan (config , list (tests ), execution_mode = execution_mode )
4747
48- def _provision (self , config : BenchmarkConfig , execution_mode : str , node_count : int , docker_engine : str | None = None ):
48+ def _provision (
49+ self ,
50+ config : BenchmarkConfig ,
51+ execution_mode : str ,
52+ node_count : int ,
53+ * ,
54+ docker_engine : str | None = None ,
55+ resume : str | None = None ,
56+ ):
4957 """Provision nodes according to execution mode; returns updated config and provisioner result."""
5058 mode = ProvisioningMode (execution_mode )
59+ node_names = None
60+ if resume and mode in (ProvisioningMode .DOCKER , ProvisioningMode .MULTIPASS ):
61+ node_names = self ._resume_node_names (config , resume )
62+ if not node_names :
63+ raise ProvisioningError (
64+ "Unable to determine previous container/VM names for resume; "
65+ "ensure the run journal or host directories are available."
66+ )
67+ if node_count != len (node_names ):
68+ raise ProvisioningError (
69+ "Resume node count does not match original run; "
70+ "use --nodes to match the previous run."
71+ )
5172 if mode is ProvisioningMode .REMOTE :
5273 request = ProvisioningRequest (
5374 mode = ProvisioningMode .REMOTE ,
@@ -58,13 +79,15 @@ def _provision(self, config: BenchmarkConfig, execution_mode: str, node_count: i
5879 request = ProvisioningRequest (
5980 mode = ProvisioningMode .DOCKER ,
6081 count = node_count ,
82+ node_names = node_names ,
6183 docker_engine = docker_engine or "docker" ,
6284 )
6385 else :
6486 temp_dir = config .output_dir .parent / "temp_keys"
6587 request = ProvisioningRequest (
6688 mode = ProvisioningMode .MULTIPASS ,
6789 count = node_count ,
90+ node_names = node_names ,
6891 state_dir = temp_dir ,
6992 )
7093 result = self ._provisioner .provision (request )
@@ -83,6 +106,49 @@ def _provision(self, config: BenchmarkConfig, execution_mode: str, node_count: i
83106 config .remote_execution .enabled = True
84107 return config , result
85108
109+ @staticmethod
110+ def _resume_node_names (config : BenchmarkConfig , resume : str ) -> list [str ] | None :
111+ from lb_app .services .run_journal import (
112+ find_latest_journal ,
113+ find_latest_results_run ,
114+ )
115+
116+ run_root = None
117+ journal_path = None
118+ if resume == "latest" :
119+ journal_path = find_latest_journal (config )
120+ if journal_path is not None :
121+ run_root = journal_path .parent
122+ else :
123+ latest = find_latest_results_run (config )
124+ if latest :
125+ journal_path = latest [1 ]
126+ run_root = journal_path .parent
127+ else :
128+ run_root = config .output_dir / resume
129+ journal_path = run_root / "run_journal.json"
130+
131+ if journal_path is not None and journal_path .exists ():
132+ try :
133+ journal = RunJournal .load (journal_path )
134+ except Exception :
135+ journal = None
136+ if journal is not None :
137+ names = sorted (
138+ {task .host for task in journal .tasks .values () if task .host }
139+ )
140+ if names :
141+ return names
142+
143+ if run_root is not None and run_root .exists ():
144+ names = sorted (
145+ entry .name
146+ for entry in run_root .iterdir ()
147+ if entry .is_dir () and not entry .name .startswith ("_" )
148+ )
149+ return names or None
150+ return None
151+
86152 def start_run (self , request : RunRequest , hooks : UIHooks ) -> RunResult | None :
87153 cfg = request .config
88154 target_tests = list (
@@ -105,6 +171,7 @@ def start_run(self, request: RunRequest, hooks: UIHooks) -> RunResult | None:
105171 setup = request .setup ,
106172 stop_file = request .stop_file ,
107173 execution_mode = request .execution_mode ,
174+ node_count = request .node_count ,
108175 preloaded_config = cfg ,
109176 )
110177
@@ -115,7 +182,8 @@ def start_run(self, request: RunRequest, hooks: UIHooks) -> RunResult | None:
115182 cfg ,
116183 request .execution_mode ,
117184 request .node_count ,
118- request .docker_engine ,
185+ docker_engine = request .docker_engine ,
186+ resume = request .resume ,
119187 )
120188 except ProvisioningError as exc :
121189 hooks .on_warning (f"Provisioning failed: { exc } " , ttl = 5 )
0 commit comments