@@ -15,9 +15,14 @@ use std::io::Write;
1515use std:: os:: unix:: fs:: PermissionsExt as _;
1616use std:: os:: unix:: process:: CommandExt ;
1717use std:: process:: Command ;
18+ use std:: time:: { Duration , Instant } ;
1819use tempfile;
1920use tracing:: debug;
2021
22+ // SSH retry configuration
23+ const SSH_RETRY_TIMEOUT_SECS : u64 = 60 ; // Total time to retry SSH connections
24+ const SSH_POLL_DELAY_SECS : u64 = 2 ; // Delay between SSH attempts
25+
2126/// Configuration options for SSH connection to libvirt domain
2227#[ derive( Debug , Parser ) ]
2328pub struct LibvirtSshOpts {
@@ -36,7 +41,7 @@ pub struct LibvirtSshOpts {
3641 pub strict_host_keys : bool ,
3742
3843 /// SSH connection timeout in seconds
39- #[ clap( long, default_value = "30 " ) ]
44+ #[ clap( long, default_value = "5 " ) ]
4045 pub timeout : u32 ,
4146
4247 /// SSH log level
@@ -236,8 +241,39 @@ impl LibvirtSshOpts {
236241 Ok ( temp_key)
237242 }
238243
239- /// Execute SSH connection to domain
240- fn connect_ssh ( & self , ssh_config : & DomainSshConfig ) -> Result < ( ) > {
244+ /// Build SSH command with configured options
245+ fn build_ssh_command (
246+ & self ,
247+ ssh_config : & DomainSshConfig ,
248+ temp_key : & tempfile:: NamedTempFile ,
249+ parsed_extra_options : Vec < ( String , String ) > ,
250+ ) -> Command {
251+ let mut ssh_cmd = Command :: new ( "ssh" ) ;
252+ ssh_cmd
253+ . arg ( "-i" )
254+ . arg ( temp_key. path ( ) )
255+ . arg ( "-p" )
256+ . arg ( ssh_config. ssh_port . to_string ( ) ) ;
257+
258+ let common_opts = crate :: ssh:: CommonSshOptions {
259+ strict_host_keys : self . strict_host_keys ,
260+ connect_timeout : self . timeout ,
261+ server_alive_interval : 60 ,
262+ log_level : self . log_level . clone ( ) ,
263+ extra_options : parsed_extra_options,
264+ } ;
265+ common_opts. apply_to_command ( & mut ssh_cmd) ;
266+ ssh_cmd. arg ( format ! ( "{}@127.0.0.1" , self . user) ) ;
267+
268+ ssh_cmd
269+ }
270+
271+ /// Execute SSH connection to domain with retries
272+ fn connect_ssh (
273+ & self ,
274+ _global_opts : & crate :: libvirt:: LibvirtOptions ,
275+ ssh_config : & DomainSshConfig ,
276+ ) -> Result < ( ) > {
241277 debug ! (
242278 "Connecting to domain '{}' via SSH on port {} (user: {})" ,
243279 self . domain_name, ssh_config. ssh_port, self . user
@@ -250,17 +286,7 @@ impl LibvirtSshOpts {
250286 // Create temporary SSH key file
251287 let temp_key = self . create_temp_ssh_key ( ssh_config) ?;
252288
253- // Build SSH command
254- let mut ssh_cmd = Command :: new ( "ssh" ) ;
255-
256- // Add SSH key and port
257- ssh_cmd
258- . arg ( "-i" )
259- . arg ( temp_key. path ( ) )
260- . arg ( "-p" )
261- . arg ( ssh_config. ssh_port . to_string ( ) ) ;
262-
263- // Parse extra options from key=value format
289+ // Parse extra options
264290 let mut parsed_extra_options = Vec :: new ( ) ;
265291 for option in & self . extra_options {
266292 if let Some ( ( key, value) ) = option. split_once ( '=' ) {
@@ -273,76 +299,117 @@ impl LibvirtSshOpts {
273299 }
274300 }
275301
276- // Apply common SSH options
277- let common_opts = crate :: ssh:: CommonSshOptions {
278- strict_host_keys : self . strict_host_keys ,
279- connect_timeout : self . timeout ,
280- server_alive_interval : 60 ,
281- log_level : self . log_level . clone ( ) ,
282- extra_options : parsed_extra_options,
283- } ;
284- common_opts. apply_to_command ( & mut ssh_cmd) ;
302+ let start_time = Instant :: now ( ) ;
303+ let timeout = Duration :: from_secs ( SSH_RETRY_TIMEOUT_SECS ) ;
285304
286- // Target host
287- ssh_cmd. arg ( format ! ( "{}@127.0.0.1" , self . user) ) ;
305+ // For interactive SSH, test connectivity first with retries, then exec
306+ if self . command . is_empty ( ) {
307+ debug ! ( "Testing SSH connectivity before interactive session" ) ;
308+
309+ // Retry until timeout
310+ loop {
311+ let mut test_cmd =
312+ self . build_ssh_command ( ssh_config, & temp_key, parsed_extra_options. clone ( ) ) ;
313+ test_cmd. arg ( "--" ) . arg ( "true" ) ; // Simple test command
314+
315+ match test_cmd. output ( ) {
316+ Ok ( output) if output. status . success ( ) => {
317+ // SSH is ready, now exec for interactive session
318+ debug ! ( "SSH ready, launching interactive session" ) ;
319+ let mut ssh_cmd =
320+ self . build_ssh_command ( ssh_config, & temp_key, parsed_extra_options) ;
321+ let error = ssh_cmd. exec ( ) ;
322+ return Err ( eyre ! ( "Failed to exec SSH command: {}" , error) ) ;
323+ }
324+ Ok ( output) => {
325+ // Check if we've exceeded timeout
326+ if start_time. elapsed ( ) >= timeout {
327+ if !self . suppress_output {
328+ let stderr_str = String :: from_utf8_lossy ( & output. stderr ) ;
329+ eprint ! ( "{}" , stderr_str) ;
330+ eprintln ! (
331+ "\n SSH connection failed after {:.1}s. To see VM console output, run: virsh console {}" ,
332+ start_time. elapsed( ) . as_secs_f64( ) ,
333+ self . domain_name
334+ ) ;
335+ }
336+ return Err ( eyre ! ( "SSH connection failed after timeout" ) ) ;
337+ }
338+
339+ // Retry all errors - show message on first attempt
340+ if start_time. elapsed ( ) . as_secs ( ) < SSH_POLL_DELAY_SECS
341+ && !self . suppress_output
342+ {
343+ eprintln ! ( "Waiting for SSH to be ready..." ) ;
344+ }
345+
346+ std:: thread:: sleep ( Duration :: from_secs ( SSH_POLL_DELAY_SECS ) ) ;
347+ }
348+ Err ( e) => {
349+ return Err ( eyre ! ( "Failed to spawn SSH command: {}" , e) ) ;
350+ }
351+ }
352+ }
353+ }
354+
355+ // For command execution: retry with timeout
356+ loop {
357+ debug ! (
358+ "SSH connection attempt (elapsed: {:.1}s)" ,
359+ start_time. elapsed( ) . as_secs_f64( )
360+ ) ;
288361
289- // Add command if specified - use the same argument escaping logic as container SSH
290- if !self . command . is_empty ( ) {
362+ // Build SSH command
363+ let mut ssh_cmd =
364+ self . build_ssh_command ( ssh_config, & temp_key, parsed_extra_options. clone ( ) ) ;
365+
366+ // Add command
291367 ssh_cmd. arg ( "--" ) ;
292368 if self . command . len ( ) > 1 {
293- // Multiple arguments need proper shell escaping
294369 let combined_command = crate :: ssh:: shell_escape_command ( & self . command )
295370 . map_err ( |e| eyre ! ( "Failed to escape shell command: {}" , e) ) ?;
296- debug ! ( "Combined escaped command: {}" , combined_command) ;
297371 ssh_cmd. arg ( combined_command) ;
298372 } else {
299- // Single argument can be passed directly
300373 ssh_cmd. args ( & self . command ) ;
301374 }
302- }
303-
304- debug ! ( "Executing SSH command: {:?}" , ssh_cmd) ;
305-
306- // For commands (non-interactive SSH), capture output
307- // For interactive SSH (no command), exec to replace current process
308- if self . command . is_empty ( ) {
309- // Interactive SSH - exec to replace the current process
310- // This provides the cleanest terminal experience
311- debug ! ( "Executing interactive SSH session via exec" ) ;
312375
313- let error = ssh_cmd. exec ( ) ;
314- // exec() only returns on error
315- return Err ( eyre ! ( "Failed to exec SSH command: {}" , error) ) ;
316- } else {
317- // Command execution - capture and forward output
376+ // Try SSH
318377 let output = ssh_cmd
319378 . output ( )
320379 . map_err ( |e| eyre ! ( "Failed to execute SSH command: {}" , e) ) ?;
321380
322- if !output. stdout . is_empty ( ) {
323- if !self . suppress_output {
324- // Forward stdout to parent process
381+ if output. status . success ( ) {
382+ if !output. stdout . is_empty ( ) && !self . suppress_output {
325383 print ! ( "{}" , String :: from_utf8_lossy( & output. stdout) ) ;
326384 }
327- debug ! ( "SSH stdout: {}" , String :: from_utf8_lossy( & output. stdout) ) ;
385+ debug ! (
386+ "SSH connected after {:.1}s" ,
387+ start_time. elapsed( ) . as_secs_f64( )
388+ ) ;
389+ return Ok ( ( ) ) ;
328390 }
329- if !output. stderr . is_empty ( ) {
391+
392+ // Check if we've exceeded timeout
393+ if start_time. elapsed ( ) >= timeout {
394+ // Timeout - fail with helpful message
330395 if !self . suppress_output {
331- // Forward stderr to parent process
332- eprint ! ( "{}" , String :: from_utf8_lossy( & output. stderr) ) ;
396+ let stderr_str = String :: from_utf8_lossy ( & output. stderr ) ;
397+ eprint ! ( "{}" , stderr_str) ;
398+ eprintln ! (
399+ "\n SSH connection failed after {:.1}s. To see VM console output, run: virsh console {}" ,
400+ start_time. elapsed( ) . as_secs_f64( ) ,
401+ self . domain_name
402+ ) ;
333403 }
334- debug ! ( "SSH stderr: {}" , String :: from_utf8_lossy( & output. stderr) ) ;
335- }
336-
337- if !output. status . success ( ) {
338404 return Err ( eyre ! (
339- "SSH connection failed with exit code: {} " ,
340- output . status . code ( ) . unwrap_or ( - 1 )
405+ "SSH connection failed after {:.1}s " ,
406+ start_time . elapsed ( ) . as_secs_f64 ( )
341407 ) ) ;
342408 }
343- }
344409
345- Ok ( ( ) )
410+ // Retry all errors
411+ std:: thread:: sleep ( Duration :: from_secs ( SSH_POLL_DELAY_SECS ) ) ;
412+ }
346413 }
347414}
348415
@@ -377,8 +444,8 @@ pub fn run_ssh_impl(
377444 // Extract SSH configuration from domain metadata
378445 let ssh_config = opts. extract_ssh_config ( global_opts) ?;
379446
380- // Connect via SSH
381- opts. connect_ssh ( & ssh_config) ?;
447+ // Connect via SSH with retries
448+ opts. connect_ssh ( global_opts , & ssh_config) ?;
382449
383450 Ok ( ( ) )
384451}
0 commit comments