Skip to content

Commit 5ca4657

Browse files
authored
vmbus_server: fix incorrect assumptions about proxy driver state (#1861)
The `proxyintegration` module made a number of assertions on the state of the vmbusproxy driver. However, because handling server requests and proxy driver requests is done in separate threads, various operations could race with a channel being revoked. If that happens, ioctl calls into the driver might fail with `ERROR_NOT_FOUND` because the channel they were operating on no longer exists. Similarly, the channel may no longer exist in the `channels` or `gpadls` maps. This change fixes those assumptions and changes them to checks, preventing a panic in case such a race occurs.
1 parent 806b6cc commit 5ca4657

File tree

1 file changed

+56
-29
lines changed

1 file changed

+56
-29
lines changed

vm/devices/vmbus/vmbus_server/src/proxyintegration.rs

Lines changed: 56 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@ use vmbus_proxy::ProxyAction;
6262
use vmbus_proxy::VmbusProxy;
6363
use vmbus_proxy::vmbusioctl::VMBUS_SERVER_OPEN_CHANNEL_OUTPUT_PARAMETERS;
6464
use vmcore::interrupt::Interrupt;
65+
use windows::Win32::Foundation::ERROR_NOT_FOUND;
6566
use windows::Win32::Foundation::ERROR_OPERATION_ABORTED;
6667
use zerocopy::IntoBytes;
6768

@@ -225,6 +226,31 @@ impl ProxyTask {
225226
recv
226227
}
227228

229+
/// Determines if the ioctl was successful or an expected error.
230+
///
231+
/// The only error this function will actually return is ERROR_NOT_FOUND, which callers can
232+
/// ignore if they do not need to behave differently.
233+
///
234+
/// It panics on all other errors.
235+
fn check_ioctl_result(
236+
result: windows::core::Result<()>,
237+
op: &str,
238+
proxy_id: u64,
239+
) -> windows::core::Result<()> {
240+
result.inspect_err(|err| {
241+
// Due to various operations racing with revoke, calls into the proxy driver may
242+
// fail with ERROR_NOT_FOUND if the channel no longer exists. Other error codes indicate
243+
// the driver and server are out of sync and can likely not be recovered from.
244+
assert!(err.code() == ERROR_NOT_FOUND.into());
245+
tracing::info!(
246+
error = err as &dyn std::error::Error,
247+
op,
248+
proxy_id,
249+
"channel not found during ioctl (possibly revoked)"
250+
);
251+
})
252+
}
253+
228254
async fn handle_open(&self, proxy_id: u64, open_request: &OpenRequest) -> anyhow::Result<()> {
229255
let maybe_wrapped =
230256
MaybeWrappedEvent::new(&TpPool::system(), open_request.interrupt.clone())?;
@@ -243,32 +269,31 @@ impl ProxyTask {
243269
.await
244270
.context("failed to open channel")?;
245271

246-
let recv = self.create_worker_thread(proxy_id);
247-
248272
let mut channels = self.channels.lock();
249-
let channel = channels.get_mut(&proxy_id).unwrap();
273+
let channel = channels
274+
.get_mut(&proxy_id)
275+
.ok_or_else(|| anyhow::anyhow!("channel revoked during open"))?;
276+
277+
let recv = self.create_worker_thread(proxy_id);
250278
channel.worker_result = Some(recv);
251279
channel.wrapped_event = maybe_wrapped.into_wrapped();
252280
Ok(())
253281
}
254282

255283
async fn handle_close(&self, proxy_id: u64) {
256-
self.proxy
257-
.close(proxy_id)
258-
.await
259-
.expect("channel close failed");
284+
let _ = Self::check_ioctl_result(self.proxy.close(proxy_id).await, "close", proxy_id);
260285

261286
// Wait for the worker task.
287+
// N.B. The channel may have been revoked.
262288
let recv = self
263289
.channels
264290
.lock()
265291
.get_mut(&proxy_id)
266-
.unwrap()
267-
.worker_result
268-
.take()
269-
.expect("channel should be open");
292+
.and_then(|channel| channel.worker_result.take());
270293

271-
let _ = recv.await;
294+
if let Some(recv) = recv {
295+
let _ = recv.await;
296+
}
272297
}
273298

274299
async fn handle_gpadl_create(
@@ -278,10 +303,14 @@ impl ProxyTask {
278303
count: u16,
279304
buf: &[u64],
280305
) -> anyhow::Result<()> {
281-
self.proxy
282-
.create_gpadl(proxy_id, gpadl_id.0, count.into(), buf.as_bytes())
283-
.await
284-
.context("failed to create gpadl")?;
306+
Self::check_ioctl_result(
307+
self.proxy
308+
.create_gpadl(proxy_id, gpadl_id.0, count.into(), buf.as_bytes())
309+
.await,
310+
"create_gpadl",
311+
proxy_id,
312+
)
313+
.context("failed to create gpadl")?;
285314

286315
self.gpadls
287316
.lock()
@@ -292,19 +321,17 @@ impl ProxyTask {
292321
}
293322

294323
async fn handle_gpadl_teardown(&self, proxy_id: u64, gpadl_id: GpadlId) {
295-
assert!(
296-
self.gpadls
297-
.lock()
298-
.get_mut(&proxy_id)
299-
.unwrap()
300-
.remove(&gpadl_id),
301-
"gpadl is registered"
302-
);
324+
if let Some(gpadls) = self.gpadls.lock().get_mut(&proxy_id) {
325+
assert!(gpadls.remove(&gpadl_id), "gpadl is registered");
326+
} else {
327+
return;
328+
}
303329

304-
self.proxy
305-
.delete_gpadl(proxy_id, gpadl_id.0)
306-
.await
307-
.expect("delete gpadl failed");
330+
let _ = Self::check_ioctl_result(
331+
self.proxy.delete_gpadl(proxy_id, gpadl_id.0).await,
332+
"delete_gpadl",
333+
proxy_id,
334+
);
308335
}
309336

310337
async fn restore_channel_on_offer(
@@ -376,7 +403,7 @@ impl ProxyTask {
376403
offer: vmbus_proxy::vmbusioctl::VMBUS_CHANNEL_OFFER,
377404
incoming_event: Event,
378405
) -> Option<mesh::Receiver<ChannelRequest>> {
379-
tracing::trace!(?offer, "received vmbusproxy offer");
406+
tracing::trace!(proxy_id, ?offer, "received vmbusproxy offer");
380407
let server = match offer.TargetVtl {
381408
0 => self.server.as_ref(),
382409
2 => {

0 commit comments

Comments
 (0)