Skip to content

Commit eb47449

Browse files
committed
invalidate the device when we encounter driver-induced device loss or on unexpected errors
1 parent ce6a46e commit eb47449

File tree

14 files changed

+176
-186
lines changed

14 files changed

+176
-186
lines changed

wgpu-core/src/command/clear.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -163,7 +163,7 @@ impl Global {
163163

164164
// actual hal barrier & operation
165165
let dst_barrier = dst_pending.map(|pending| pending.into_hal(&dst_buffer, &snatch_guard));
166-
let cmd_buf_raw = cmd_buf_data.encoder.open()?;
166+
let cmd_buf_raw = cmd_buf_data.encoder.open(&cmd_buf.device)?;
167167
unsafe {
168168
cmd_buf_raw.transition_buffers(dst_barrier.as_slice());
169169
cmd_buf_raw.clear_buffer(dst_raw, offset..end_offset);
@@ -249,7 +249,7 @@ impl Global {
249249

250250
let device = &cmd_buf.device;
251251
device.check_is_valid()?;
252-
let (encoder, tracker) = cmd_buf_data.open_encoder_and_tracker()?;
252+
let (encoder, tracker) = cmd_buf_data.open_encoder_and_tracker(&cmd_buf.device)?;
253253

254254
let snatch_guard = device.snatchable_lock.read();
255255
clear_texture(

wgpu-core/src/command/compute.rs

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -433,10 +433,10 @@ impl Global {
433433
// We automatically keep extending command buffers over time, and because
434434
// we want to insert a command buffer _before_ what we're about to record,
435435
// we need to make sure to close the previous one.
436-
encoder.close().map_pass_err(pass_scope)?;
436+
encoder.close(&cmd_buf.device).map_pass_err(pass_scope)?;
437437
// will be reset to true if recording is done without errors
438438
*status = CommandEncoderStatus::Error;
439-
let raw_encoder = encoder.open().map_pass_err(pass_scope)?;
439+
let raw_encoder = encoder.open(&cmd_buf.device).map_pass_err(pass_scope)?;
440440

441441
let mut state = State {
442442
binder: Binder::new(),
@@ -617,12 +617,12 @@ impl Global {
617617
} = state;
618618

619619
// Stop the current command buffer.
620-
encoder.close().map_pass_err(pass_scope)?;
620+
encoder.close(&cmd_buf.device).map_pass_err(pass_scope)?;
621621

622622
// Create a new command buffer, which we will insert _before_ the body of the compute pass.
623623
//
624624
// Use that buffer to insert barriers and clear discarded images.
625-
let transit = encoder.open().map_pass_err(pass_scope)?;
625+
let transit = encoder.open(&cmd_buf.device).map_pass_err(pass_scope)?;
626626
fixup_discarded_surfaces(
627627
pending_discard_init_fixups.into_iter(),
628628
transit,
@@ -637,7 +637,9 @@ impl Global {
637637
&snatch_guard,
638638
);
639639
// Close the command buffer, and swap it with the previous.
640-
encoder.close_and_swap().map_pass_err(pass_scope)?;
640+
encoder
641+
.close_and_swap(&cmd_buf.device)
642+
.map_pass_err(pass_scope)?;
641643

642644
Ok(())
643645
}

wgpu-core/src/command/mod.rs

Lines changed: 19 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -172,10 +172,10 @@ impl CommandEncoder {
172172
/// [l]: CommandEncoder::list
173173
/// [`transition_buffers`]: hal::CommandEncoder::transition_buffers
174174
/// [`transition_textures`]: hal::CommandEncoder::transition_textures
175-
fn close_and_swap(&mut self) -> Result<(), DeviceError> {
175+
fn close_and_swap(&mut self, device: &Device) -> Result<(), DeviceError> {
176176
if self.is_open {
177177
self.is_open = false;
178-
let new = unsafe { self.raw.end_encoding()? };
178+
let new = unsafe { self.raw.end_encoding() }.map_err(|e| device.handle_hal_error(e))?;
179179
self.list.insert(self.list.len() - 1, new);
180180
}
181181

@@ -192,10 +192,11 @@ impl CommandEncoder {
192192
/// On return, the underlying hal encoder is closed.
193193
///
194194
/// [l]: CommandEncoder::list
195-
fn close(&mut self) -> Result<(), DeviceError> {
195+
fn close(&mut self, device: &Device) -> Result<(), DeviceError> {
196196
if self.is_open {
197197
self.is_open = false;
198-
let cmd_buf = unsafe { self.raw.end_encoding()? };
198+
let cmd_buf =
199+
unsafe { self.raw.end_encoding() }.map_err(|e| device.handle_hal_error(e))?;
199200
self.list.push(cmd_buf);
200201
}
201202

@@ -215,11 +216,15 @@ impl CommandEncoder {
215216
/// Begin recording a new command buffer, if we haven't already.
216217
///
217218
/// The underlying hal encoder is put in the "recording" state.
218-
pub(crate) fn open(&mut self) -> Result<&mut dyn hal::DynCommandEncoder, DeviceError> {
219+
pub(crate) fn open(
220+
&mut self,
221+
device: &Device,
222+
) -> Result<&mut dyn hal::DynCommandEncoder, DeviceError> {
219223
if !self.is_open {
220224
self.is_open = true;
221225
let hal_label = self.hal_label.as_deref();
222-
unsafe { self.raw.begin_encoding(hal_label)? };
226+
unsafe { self.raw.begin_encoding(hal_label) }
227+
.map_err(|e| device.handle_hal_error(e))?;
223228
}
224229

225230
Ok(self.raw.as_mut())
@@ -229,9 +234,9 @@ impl CommandEncoder {
229234
/// its own label.
230235
///
231236
/// The underlying hal encoder is put in the "recording" state.
232-
fn open_pass(&mut self, hal_label: Option<&str>) -> Result<(), DeviceError> {
237+
fn open_pass(&mut self, hal_label: Option<&str>, device: &Device) -> Result<(), DeviceError> {
233238
self.is_open = true;
234-
unsafe { self.raw.begin_encoding(hal_label)? };
239+
unsafe { self.raw.begin_encoding(hal_label) }.map_err(|e| device.handle_hal_error(e))?;
235240

236241
Ok(())
237242
}
@@ -276,8 +281,9 @@ pub struct CommandBufferMutable {
276281
impl CommandBufferMutable {
277282
pub(crate) fn open_encoder_and_tracker(
278283
&mut self,
284+
device: &Device,
279285
) -> Result<(&mut dyn hal::DynCommandEncoder, &mut Tracker), DeviceError> {
280-
let encoder = self.encoder.open()?;
286+
let encoder = self.encoder.open(device)?;
281287
let tracker = &mut self.trackers;
282288

283289
Ok((encoder, tracker))
@@ -621,7 +627,7 @@ impl Global {
621627
let cmd_buf_data = cmd_buf_data.as_mut().unwrap();
622628
match cmd_buf_data.status {
623629
CommandEncoderStatus::Recording => {
624-
if let Err(e) = cmd_buf_data.encoder.close() {
630+
if let Err(e) = cmd_buf_data.encoder.close(&cmd_buf.device) {
625631
Some(e.into())
626632
} else {
627633
cmd_buf_data.status = CommandEncoderStatus::Finished;
@@ -671,7 +677,7 @@ impl Global {
671677
list.push(TraceCommand::PushDebugGroup(label.to_string()));
672678
}
673679

674-
let cmd_buf_raw = cmd_buf_data.encoder.open()?;
680+
let cmd_buf_raw = cmd_buf_data.encoder.open(&cmd_buf.device)?;
675681
if !self
676682
.instance
677683
.flags
@@ -713,7 +719,7 @@ impl Global {
713719
.flags
714720
.contains(wgt::InstanceFlags::DISCARD_HAL_LABELS)
715721
{
716-
let cmd_buf_raw = cmd_buf_data.encoder.open()?;
722+
let cmd_buf_raw = cmd_buf_data.encoder.open(&cmd_buf.device)?;
717723
unsafe {
718724
cmd_buf_raw.insert_debug_marker(label);
719725
}
@@ -744,7 +750,7 @@ impl Global {
744750
list.push(TraceCommand::PopDebugGroup);
745751
}
746752

747-
let cmd_buf_raw = cmd_buf_data.encoder.open()?;
753+
let cmd_buf_raw = cmd_buf_data.encoder.open(&cmd_buf.device)?;
748754
if !self
749755
.instance
750756
.flags

wgpu-core/src/command/query.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -346,7 +346,7 @@ impl Global {
346346
let encoder = &mut cmd_buf_data.encoder;
347347
let tracker = &mut cmd_buf_data.trackers;
348348

349-
let raw_encoder = encoder.open()?;
349+
let raw_encoder = encoder.open(&cmd_buf.device)?;
350350

351351
let query_set = hub
352352
.query_sets
@@ -397,7 +397,7 @@ impl Global {
397397
let encoder = &mut cmd_buf_data.encoder;
398398
let tracker = &mut cmd_buf_data.trackers;
399399
let buffer_memory_init_actions = &mut cmd_buf_data.buffer_memory_init_actions;
400-
let raw_encoder = encoder.open()?;
400+
let raw_encoder = encoder.open(&cmd_buf.device)?;
401401

402402
if destination_offset % wgt::QUERY_RESOLVE_BUFFER_ALIGNMENT != 0 {
403403
return Err(QueryError::Resolve(ResolveError::BufferOffsetAlignment));

wgpu-core/src/command/render.rs

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1588,10 +1588,12 @@ impl Global {
15881588
// We automatically keep extending command buffers over time, and because
15891589
// we want to insert a command buffer _before_ what we're about to record,
15901590
// we need to make sure to close the previous one.
1591-
encoder.close().map_pass_err(pass_scope)?;
1591+
encoder.close(&cmd_buf.device).map_pass_err(pass_scope)?;
15921592
// We will reset this to `Recording` if we succeed, acts as a fail-safe.
15931593
*status = CommandEncoderStatus::Error;
1594-
encoder.open_pass(hal_label).map_pass_err(pass_scope)?;
1594+
encoder
1595+
.open_pass(hal_label, &cmd_buf.device)
1596+
.map_pass_err(pass_scope)?;
15951597

15961598
let info = RenderPassInfo::start(
15971599
device,
@@ -1894,7 +1896,7 @@ impl Global {
18941896
.finish(state.raw_encoder, state.snatch_guard)
18951897
.map_pass_err(pass_scope)?;
18961898

1897-
encoder.close().map_pass_err(pass_scope)?;
1899+
encoder.close(&cmd_buf.device).map_pass_err(pass_scope)?;
18981900
(trackers, pending_discard_init_fixups)
18991901
};
19001902

@@ -1906,7 +1908,7 @@ impl Global {
19061908
let tracker = &mut cmd_buf_data.trackers;
19071909

19081910
{
1909-
let transit = encoder.open().map_pass_err(pass_scope)?;
1911+
let transit = encoder.open(&cmd_buf.device).map_pass_err(pass_scope)?;
19101912

19111913
fixup_discarded_surfaces(
19121914
pending_discard_init_fixups.into_iter(),
@@ -1922,7 +1924,9 @@ impl Global {
19221924
}
19231925

19241926
*status = CommandEncoderStatus::Recording;
1925-
encoder.close_and_swap().map_pass_err(pass_scope)?;
1927+
encoder
1928+
.close_and_swap(&cmd_buf.device)
1929+
.map_pass_err(pass_scope)?;
19261930

19271931
Ok(())
19281932
}

wgpu-core/src/command/transfer.rs

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -432,7 +432,7 @@ fn handle_texture_init(
432432

433433
// In rare cases we may need to insert an init operation immediately onto the command buffer.
434434
if !immediate_inits.is_empty() {
435-
let cmd_buf_raw = encoder.open()?;
435+
let cmd_buf_raw = encoder.open(device)?;
436436
for init in immediate_inits {
437437
clear_texture(
438438
&init.texture,
@@ -684,7 +684,7 @@ impl Global {
684684
dst_offset: destination_offset,
685685
size: wgt::BufferSize::new(size).unwrap(),
686686
};
687-
let cmd_buf_raw = cmd_buf_data.encoder.open()?;
687+
let cmd_buf_raw = cmd_buf_data.encoder.open(&cmd_buf.device)?;
688688
let barriers = src_barrier
689689
.into_iter()
690690
.chain(dst_barrier)
@@ -855,7 +855,7 @@ impl Global {
855855
})
856856
.collect::<Vec<_>>();
857857

858-
let cmd_buf_raw = encoder.open()?;
858+
let cmd_buf_raw = encoder.open(&cmd_buf.device)?;
859859
unsafe {
860860
cmd_buf_raw.transition_textures(&dst_barrier);
861861
cmd_buf_raw.transition_buffers(src_barrier.as_slice());
@@ -1030,7 +1030,7 @@ impl Global {
10301030
}
10311031
})
10321032
.collect::<Vec<_>>();
1033-
let cmd_buf_raw = encoder.open()?;
1033+
let cmd_buf_raw = encoder.open(&cmd_buf.device)?;
10341034
unsafe {
10351035
cmd_buf_raw.transition_buffers(dst_barrier.as_slice());
10361036
cmd_buf_raw.transition_textures(&src_barrier);
@@ -1209,7 +1209,7 @@ impl Global {
12091209
}
12101210
})
12111211
.collect::<Vec<_>>();
1212-
let cmd_buf_raw = cmd_buf_data.encoder.open()?;
1212+
let cmd_buf_raw = cmd_buf_data.encoder.open(&cmd_buf.device)?;
12131213
unsafe {
12141214
cmd_buf_raw.transition_textures(&barriers);
12151215
cmd_buf_raw.copy_texture_to_texture(

wgpu-core/src/device/global.rs

Lines changed: 18 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -270,21 +270,27 @@ impl Global {
270270

271271
let snatch_guard = device.snatchable_lock.read();
272272
let raw_buf = buffer.try_raw(&snatch_guard)?;
273-
unsafe {
274-
let mapping = device
273+
274+
let mapping = unsafe {
275+
device
275276
.raw()
276277
.map_buffer(raw_buf, offset..offset + data.len() as u64)
277-
.map_err(DeviceError::from)?;
278-
std::ptr::copy_nonoverlapping(data.as_ptr(), mapping.ptr.as_ptr(), data.len());
279-
if !mapping.is_coherent {
280-
#[allow(clippy::single_range_in_vec_init)]
278+
}
279+
.map_err(|e| device.handle_hal_error(e))?;
280+
281+
unsafe { std::ptr::copy_nonoverlapping(data.as_ptr(), mapping.ptr.as_ptr(), data.len()) };
282+
283+
if !mapping.is_coherent {
284+
#[allow(clippy::single_range_in_vec_init)]
285+
unsafe {
281286
device
282287
.raw()
283-
.flush_mapped_ranges(raw_buf, &[offset..offset + data.len() as u64]);
284-
}
285-
device.raw().unmap_buffer(raw_buf);
288+
.flush_mapped_ranges(raw_buf, &[offset..offset + data.len() as u64])
289+
};
286290
}
287291

292+
unsafe { device.raw().unmap_buffer(raw_buf) };
293+
288294
Ok(())
289295
}
290296

@@ -2006,7 +2012,9 @@ impl Global {
20062012
hal::SurfaceError::Outdated | hal::SurfaceError::Lost => {
20072013
E::InvalidSurface
20082014
}
2009-
hal::SurfaceError::Device(error) => E::Device(error.into()),
2015+
hal::SurfaceError::Device(error) => {
2016+
E::Device(device.handle_hal_error(error))
2017+
}
20102018
hal::SurfaceError::Other(message) => {
20112019
log::error!("surface configuration failed: {}", message);
20122020
E::InvalidSurface
@@ -2289,16 +2297,6 @@ impl Global {
22892297
}
22902298
}
22912299

2292-
pub fn device_mark_lost(&self, device_id: DeviceId, message: &str) {
2293-
api_log!("Device::mark_lost {device_id:?}");
2294-
2295-
let hub = &self.hub;
2296-
2297-
if let Ok(device) = hub.devices.get(device_id) {
2298-
device.lose(message);
2299-
}
2300-
}
2301-
23022300
pub fn device_get_internal_counters(&self, device_id: DeviceId) -> wgt::InternalCounters {
23032301
let hub = &self.hub;
23042302
if let Ok(device) = hub.devices.get(device_id) {

wgpu-core/src/device/mod.rs

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -308,7 +308,7 @@ fn map_buffer(
308308
let raw_buffer = buffer.try_raw(snatch_guard)?;
309309
let mapping = unsafe {
310310
raw.map_buffer(raw_buffer, offset..offset + size)
311-
.map_err(DeviceError::from)?
311+
.map_err(|e| buffer.device.handle_hal_error(e))?
312312
};
313313

314314
if !mapping.is_coherent && kind == HostMap::Read {
@@ -420,13 +420,16 @@ pub enum DeviceError {
420420
DeviceMismatch(#[from] Box<DeviceMismatch>),
421421
}
422422

423-
impl From<hal::DeviceError> for DeviceError {
424-
fn from(error: hal::DeviceError) -> Self {
423+
impl DeviceError {
424+
/// Only use this function in contexts where there is no `Device`.
425+
///
426+
/// Use [`Device::handle_hal_error`] otherwise.
427+
pub fn from_hal(error: hal::DeviceError) -> Self {
425428
match error {
426-
hal::DeviceError::Lost => DeviceError::Lost,
427-
hal::DeviceError::OutOfMemory => DeviceError::OutOfMemory,
428-
hal::DeviceError::ResourceCreationFailed => DeviceError::ResourceCreationFailed,
429-
hal::DeviceError::Unexpected => DeviceError::Lost,
429+
hal::DeviceError::Lost => Self::Lost,
430+
hal::DeviceError::OutOfMemory => Self::OutOfMemory,
431+
hal::DeviceError::ResourceCreationFailed => Self::ResourceCreationFailed,
432+
hal::DeviceError::Unexpected => Self::Lost,
430433
}
431434
}
432435
}

0 commit comments

Comments
 (0)