@@ -188,6 +188,34 @@ async fn wait_for_vm_state(
188188 ) )
189189}
190190
191+ async fn wait_for_target_state (
192+ stream : & mut tonic:: Streaming < VmEvent > ,
193+ target_state : VmState ,
194+ ) -> Result < ( ) > {
195+ while let Some ( event_res) = stream. next ( ) . await {
196+ let event = event_res?;
197+ let any_data = event. data . expect ( "Event should have data payload" ) ;
198+ if any_data. type_url == "type.googleapis.com/feos.vm.vmm.api.v1.VmStateChangedEvent" {
199+ let state_change = VmStateChangedEvent :: decode ( & * any_data. value ) ?;
200+ let new_state =
201+ VmState :: try_from ( state_change. new_state ) . unwrap_or ( VmState :: Unspecified ) ;
202+
203+ info ! (
204+ "Received VM state change event: new_state={:?}, reason='{}'" ,
205+ new_state, state_change. reason
206+ ) ;
207+
208+ if new_state == target_state {
209+ return Ok ( ( ) ) ;
210+ }
211+ }
212+ }
213+ Err ( anyhow:: anyhow!(
214+ "Event stream ended before VM reached {:?} state." ,
215+ target_state
216+ ) )
217+ }
218+
191219#[ tokio:: test]
192220async fn test_create_and_start_vm ( ) -> Result < ( ) > {
193221 if skip_if_ch_binary_missing ( ) {
@@ -291,6 +319,115 @@ async fn test_create_and_start_vm() -> Result<()> {
291319 Ok ( ( ) )
292320}
293321
322+ #[ tokio:: test]
323+ async fn test_vm_healthcheck_and_crash_recovery ( ) -> Result < ( ) > {
324+ if skip_if_ch_binary_missing ( ) {
325+ return Ok ( ( ) ) ;
326+ }
327+
328+ ensure_server ( ) . await ;
329+ let ( mut vm_client, _) = get_public_clients ( ) . await ?;
330+
331+ let image_ref = TEST_IMAGE_REF . clone ( ) ;
332+ let vm_config = VmConfig {
333+ cpus : Some ( CpuConfig {
334+ boot_vcpus : 1 ,
335+ max_vcpus : 1 ,
336+ } ) ,
337+ memory : Some ( MemoryConfig { size_mib : 1024 } ) ,
338+ image_ref,
339+ disks : vec ! [ ] ,
340+ net : vec ! [ ] ,
341+ ignition : None ,
342+ } ;
343+ let create_req = CreateVmRequest {
344+ config : Some ( vm_config) ,
345+ vm_id : None ,
346+ } ;
347+
348+ info ! ( "Sending CreateVm request for healthcheck test" ) ;
349+ let create_res = vm_client. create_vm ( create_req) . await ?. into_inner ( ) ;
350+ let vm_id = create_res. vm_id ;
351+ info ! ( "VM created with ID: {}" , vm_id) ;
352+
353+ let mut guard = VmGuard {
354+ vm_id : vm_id. clone ( ) ,
355+ pid : None ,
356+ cleanup_disabled : false ,
357+ } ;
358+
359+ info ! ( "Connecting to StreamVmEvents stream for vm_id: {}" , & vm_id) ;
360+ let events_req = StreamVmEventsRequest {
361+ vm_id : Some ( vm_id. clone ( ) ) ,
362+ ..Default :: default ( )
363+ } ;
364+ let mut stream = vm_client. stream_vm_events ( events_req) . await ?. into_inner ( ) ;
365+
366+ timeout (
367+ Duration :: from_secs ( 180 ) ,
368+ wait_for_vm_state ( & mut stream, VmState :: Created ) ,
369+ )
370+ . await
371+ . expect ( "Timed out waiting for VM to become created" ) ?;
372+ info ! ( "VM is in CREATED state" ) ;
373+
374+ info ! ( "Sending StartVm request for vm_id: {}" , & vm_id) ;
375+ let start_req = StartVmRequest {
376+ vm_id : vm_id. clone ( ) ,
377+ } ;
378+ vm_client. start_vm ( start_req) . await ?;
379+
380+ timeout (
381+ Duration :: from_secs ( 30 ) ,
382+ wait_for_vm_state ( & mut stream, VmState :: Running ) ,
383+ )
384+ . await
385+ . expect ( "Timed out waiting for VM to become running" ) ?;
386+ info ! ( "VM is in RUNNING state" ) ;
387+
388+ info ! ( "Pinging VMM for vm_id: {}" , & vm_id) ;
389+ let ping_req = PingVmRequest {
390+ vm_id : vm_id. clone ( ) ,
391+ } ;
392+ let ping_res = vm_client. ping_vm ( ping_req) . await ?. into_inner ( ) ;
393+ info ! ( "VMM Ping successful, PID: {}" , ping_res. pid) ;
394+ let pid_to_kill = Pid :: from_raw ( ping_res. pid as i32 ) ;
395+ guard. pid = Some ( pid_to_kill) ;
396+
397+ info ! (
398+ "Forcefully killing hypervisor process with PID: {}" ,
399+ pid_to_kill
400+ ) ;
401+ kill ( pid_to_kill, Signal :: SIGKILL ) . context ( "Failed to kill hypervisor process" ) ?;
402+ info ! ( "Successfully sent SIGKILL to process {}" , pid_to_kill) ;
403+
404+ timeout (
405+ Duration :: from_secs ( 30 ) ,
406+ wait_for_target_state ( & mut stream, VmState :: Crashed ) ,
407+ )
408+ . await
409+ . expect ( "Timed out waiting for VM to enter Crashed state" ) ?;
410+ info ! ( "VM is in CRASHED state as expected" ) ;
411+
412+ info ! ( "Deleting crashed VM: {}" , & vm_id) ;
413+ let delete_req = DeleteVmRequest {
414+ vm_id : vm_id. clone ( ) ,
415+ } ;
416+ vm_client. delete_vm ( delete_req) . await ?. into_inner ( ) ;
417+ info ! ( "DeleteVm call successful for crashed VM" ) ;
418+
419+ let socket_path = format ! ( "{}/{}" , VM_API_SOCKET_DIR , & vm_id) ;
420+ assert ! (
421+ !Path :: new( & socket_path) . exists( ) ,
422+ "Socket file '{}' should not exist after DeleteVm" ,
423+ socket_path
424+ ) ;
425+ info ! ( "Verified VM API socket is deleted: {}" , socket_path) ;
426+
427+ guard. cleanup_disabled = true ;
428+ Ok ( ( ) )
429+ }
430+
294431#[ tokio:: test]
295432async fn test_hostname_retrieval ( ) -> Result < ( ) > {
296433 ensure_server ( ) . await ;
@@ -319,7 +456,6 @@ async fn test_get_memory_info() -> Result<()> {
319456 ensure_server ( ) . await ;
320457 let ( _, mut host_client) = get_public_clients ( ) . await ?;
321458
322- // 1. Read local /proc/meminfo to get the ground truth
323459 let file = File :: open ( "/proc/meminfo" ) ?;
324460 let reader = BufReader :: new ( file) ;
325461 let mut local_memtotal = 0 ;
@@ -340,11 +476,9 @@ async fn test_get_memory_info() -> Result<()> {
340476 ) ;
341477 info ! ( "Local MemTotal from /proc/meminfo: {} kB" , local_memtotal) ;
342478
343- // 2. Make the gRPC call
344479 info ! ( "Sending GetMemory request" ) ;
345480 let response = host_client. get_memory ( MemoryRequest { } ) . await ?. into_inner ( ) ;
346481
347- // 3. Validate the response
348482 let mem_info = response
349483 . mem_info
350484 . context ( "MemoryInfo was not present in the response" ) ?;
0 commit comments