Skip to content

Commit f6f21b4

Browse files
ShadowCursebchalios
authored andcommitted
fix(vsock): save state after sending a notification
This is a fix for a fix introduced in #4796 The issue was in vsock device hanging after snapshot restoration due to the guest not being notified about the termination packet. But there was bug in the fix, maily we saved the vsock state before the notification was sent, thus discarding all modifications made to sent the notification. The reason original fix worked, is because we were only testing with 1 iteration of snap/restore. This way even though we lost synchronization with the guest in the event queue state, it worked fine once. But doing more iterations causes vsock to hang as before. This commit fixes the issue by storing vsock state after the notification is sent and modifies the vsock test to run multiple iterations of snap/restore. Signed-off-by: Egor Lazarchuk <[email protected]>
1 parent 121fab6 commit f6f21b4

File tree

2 files changed

+62
-53
lines changed

2 files changed

+62
-53
lines changed

src/vmm/src/device_manager/persist.rs

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -365,11 +365,6 @@ impl<'a> Persist<'a> for MMIODeviceManager {
365365
.downcast_mut::<Vsock<VsockUnixBackend>>()
366366
.unwrap();
367367

368-
let vsock_state = VsockState {
369-
backend: vsock.backend().save(),
370-
frontend: vsock.save(),
371-
};
372-
373368
// Send Transport event to reset connections if device
374369
// is activated.
375370
if vsock.is_activated() {
@@ -378,6 +373,13 @@ impl<'a> Persist<'a> for MMIODeviceManager {
378373
});
379374
}
380375

376+
// Save state after potential notification to the guest. This
377+
// way we save changes to the queue the notification can cause.
378+
let vsock_state = VsockState {
379+
backend: vsock.backend().save(),
380+
frontend: vsock.save(),
381+
};
382+
381383
states.vsock_device = Some(ConnectedVsockState {
382384
device_id: devid.clone(),
383385
device_state: vsock_state,

tests/integration_tests/functional/test_vsock.py

Lines changed: 55 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -227,54 +227,61 @@ def test_vsock_transport_reset_g2h(uvm_nano, microvm_factory):
227227
test_vm.api.vsock.put(vsock_id="vsock0", guest_cid=3, uds_path=f"/{VSOCK_UDS_PATH}")
228228
test_vm.start()
229229

230-
host_socket_path = os.path.join(
231-
test_vm.path, f"{VSOCK_UDS_PATH}_{ECHO_SERVER_PORT}"
232-
)
233-
host_socat_commmand = [
234-
"socat",
235-
"-dddd",
236-
f"UNIX-LISTEN:{host_socket_path},fork",
237-
"STDOUT",
238-
]
239-
host_socat = subprocess.Popen(
240-
host_socat_commmand, stdout=subprocess.PIPE, stderr=subprocess.PIPE
241-
)
242-
243-
# Give some time for host socat to create socket
244-
time.sleep(0.5)
245-
assert Path(host_socket_path).exists()
246-
test_vm.create_jailed_resource(host_socket_path)
247-
248-
# Create a socat process in the guest which will connect to the host socat
249-
guest_socat_commmand = f"tmux new -d 'socat - vsock-connect:2:{ECHO_SERVER_PORT}'"
250-
test_vm.ssh.run(guest_socat_commmand)
251-
252-
# socat should be running in the guest now
253-
code, _, _ = test_vm.ssh.run("pidof socat")
254-
assert code == 0
255-
256-
# Create snapshot.
230+
# Create snapshot and terminate a VM.
257231
snapshot = test_vm.snapshot_full()
258-
test_vm.resume()
259-
260-
# After `create_snapshot` + 'restore' calls, connection should be dropped
261-
code, _, _ = test_vm.ssh.run("pidof socat")
262-
assert code == 1
263-
264-
# Kill host socat as it is not useful anymore
265-
host_socat.kill()
266-
host_socat.communicate()
267-
268-
# Terminate VM.
269232
test_vm.kill()
270233

271-
# Load snapshot.
272-
vm2 = microvm_factory.build()
273-
vm2.spawn()
274-
vm2.restore_from_snapshot(snapshot, resume=True)
275-
276-
# After snap restore all vsock connections should be
277-
# dropped. This means guest socat should exit same way
278-
# as it did after snapshot was taken.
279-
code, _, _ = vm2.ssh.run("pidof socat")
280-
assert code == 1
234+
for _ in range(5):
235+
# Load snapshot.
236+
new_vm = microvm_factory.build()
237+
new_vm.spawn()
238+
new_vm.restore_from_snapshot(snapshot, resume=True)
239+
240+
# After snap restore all vsock connections should be
241+
# dropped. This means guest socat should exit same way
242+
# as it did after snapshot was taken.
243+
code, _, _ = new_vm.ssh.run("pidof socat")
244+
assert code == 1
245+
246+
host_socket_path = os.path.join(
247+
new_vm.path, f"{VSOCK_UDS_PATH}_{ECHO_SERVER_PORT}"
248+
)
249+
host_socat_commmand = [
250+
"socat",
251+
"-dddd",
252+
f"UNIX-LISTEN:{host_socket_path},fork",
253+
"STDOUT",
254+
]
255+
host_socat = subprocess.Popen(
256+
host_socat_commmand, stdout=subprocess.PIPE, stderr=subprocess.PIPE
257+
)
258+
259+
# Give some time for host socat to create socket
260+
time.sleep(0.5)
261+
assert Path(host_socket_path).exists()
262+
new_vm.create_jailed_resource(host_socket_path)
263+
264+
# Create a socat process in the guest which will connect to the host socat
265+
guest_socat_commmand = (
266+
f"tmux new -d 'socat - vsock-connect:2:{ECHO_SERVER_PORT}'"
267+
)
268+
new_vm.ssh.run(guest_socat_commmand)
269+
270+
# socat should be running in the guest now
271+
code, _, _ = new_vm.ssh.run("pidof socat")
272+
assert code == 0
273+
274+
# Create snapshot.
275+
snapshot = new_vm.snapshot_full()
276+
new_vm.resume()
277+
278+
# After `create_snapshot` + 'restore' calls, connection should be dropped
279+
code, _, _ = new_vm.ssh.run("pidof socat")
280+
assert code == 1
281+
282+
# Kill host socat as it is not useful anymore
283+
host_socat.kill()
284+
host_socat.communicate()
285+
286+
# Terminate VM.
287+
new_vm.kill()

0 commit comments

Comments
 (0)