Skip to content

Commit bb8571f

Browse files
committed
Fix buffer initialization tracking for some buffer-texture copies
Fixes #7947 Fixes #8021 Fixes #8097
1 parent 309ffdc commit bb8571f

File tree

4 files changed

+132
-39
lines changed

4 files changed

+132
-39
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,7 @@ By @cwfitzgerald in [#8162](https://github.com/gfx-rs/wgpu/pull/8162).
128128
- The limits requested for a device must now satisfy `min_subgroup_size <= max_subgroup_size`. By @andyleiserson in [#8085](https://github.com/gfx-rs/wgpu/pull/8085).
129129
- Require new `F16_IN_F32` downlevel flag for `quantizeToF16`, `pack2x16float`, and `unpack2x16float` in WGSL input. By @aleiserson in [#8130](https://github.com/gfx-rs/wgpu/pull/8130).
130130
- The error message for non-copyable depth/stencil formats no longer mentions the aspect when it is not relevant. By @reima in [#8156](https://github.com/gfx-rs/wgpu/pull/8156).
131+
- Track the initialization status of buffer memory correctly when `copy_texture_to_buffer` skips over padding space between rows or layers, or when the start/end of a texture-buffer transfer is not 4B aligned. By @andyleiserson in [#8099](https://github.com/gfx-rs/wgpu/pull/8099).
131132

132133
#### naga
133134

cts_runner/test.lst

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -13,12 +13,9 @@ webgpu:api,operation,compute,basic:memcpy:*
1313
//FAIL: webgpu:api,operation,compute,basic:large_dispatch:*
1414
webgpu:api,operation,compute_pipeline,overrides:*
1515
webgpu:api,operation,device,lost:*
16-
// This is all the storeOp tests, but some of them fail if run in a single invocation.
17-
// https://github.com/gfx-rs/wgpu/issues/8021
18-
webgpu:api,operation,render_pass,storeOp:render_pass_store_op,color_attachment_with_depth_stencil_attachment:*
19-
webgpu:api,operation,render_pass,storeOp:render_pass_store_op,color_attachment_only:*
20-
webgpu:api,operation,render_pass,storeOp:render_pass_store_op,multiple_color_attachments:*
21-
webgpu:api,operation,render_pass,storeOp:render_pass_store_op,depth_stencil_attachment_only:*
16+
webgpu:api,operation,render_pass,storeOp:*
17+
// Presumably vertex pulling, revisit after https://github.com/gfx-rs/wgpu/issues/7981 is fixed.
18+
fails-if(metal) webgpu:api,operation,vertex_state,correctness:setVertexBuffer_offset_and_attribute_offset:*
2219
fails-if(dx12) webgpu:api,validation,capability_checks,limits,maxBindGroups:setBindGroup,*
2320
webgpu:api,validation,createBindGroup:buffer,effective_buffer_binding_size:*
2421
webgpu:api,validation,encoding,beginComputePass:*
@@ -74,14 +71,18 @@ webgpu:shader,validation,extension,dual_source_blending:blend_src_syntax_validat
7471
webgpu:api,validation,image_copy,buffer_texture_copies:depth_stencil_format,copy_usage_and_aspect:*
7572
webgpu:api,validation,image_copy,buffer_texture_copies:depth_stencil_format,copy_buffer_size:*
7673
// image_copy depth/stencil failures on dx12: https://github.com/gfx-rs/wgpu/issues/8133
74+
fails-if(dx12) webgpu:api,validation,image_copy,buffer_texture_copies:depth_stencil_format,copy_buffer_offset:format="stencil8";aspect="stencil-only";copyType="CopyB2T"
75+
fails-if(dx12) webgpu:api,validation,image_copy,buffer_texture_copies:depth_stencil_format,copy_buffer_offset:format="stencil8";aspect="stencil-only";copyType="CopyT2B"
7776
fails-if(dx12) webgpu:api,validation,image_copy,buffer_texture_copies:depth_stencil_format,copy_buffer_offset:format="depth32float";aspect="depth-only";copyType="CopyT2B"
7877
fails-if(dx12) webgpu:api,validation,image_copy,buffer_texture_copies:depth_stencil_format,copy_buffer_offset:format="depth24plus-stencil8";aspect="stencil-only";copyType="CopyB2T"
7978
fails-if(dx12) webgpu:api,validation,image_copy,buffer_texture_copies:depth_stencil_format,copy_buffer_offset:format="depth24plus-stencil8";aspect="stencil-only";copyType="CopyT2B"
8079
//mix of PASS and FAIL: other subtests of copy_buffer_offset. Related bugs:
81-
// https://github.com/gfx-rs/wgpu/issues/7946, https://github.com/gfx-rs/wgpu/issues/7947
80+
// https://github.com/gfx-rs/wgpu/issues/7946
8281
webgpu:api,validation,image_copy,buffer_texture_copies:sample_count:*
8382
webgpu:api,validation,image_copy,buffer_texture_copies:texture_buffer_usages:*
8483
webgpu:api,validation,image_copy,buffer_texture_copies:device_mismatch:*
84+
fails-if(dx12) webgpu:api,validation,image_copy,buffer_texture_copies:offset_and_bytesPerRow:format="r8uint";copyType="CopyB2T";dimension="1d"
85+
fails-if(dx12) webgpu:api,validation,image_copy,buffer_texture_copies:offset_and_bytesPerRow:format="r8uint";copyType="CopyT2B";dimension="1d"
8586
fails-if(dx12) webgpu:api,validation,image_copy,buffer_texture_copies:offset_and_bytesPerRow:format="rgba32float";copyType="CopyB2T";dimension="1d"
8687
fails-if(dx12) webgpu:api,validation,image_copy,buffer_texture_copies:offset_and_bytesPerRow:format="rgba32float";copyType="CopyT2B";dimension="1d"
8788
fails-if(dx12) webgpu:api,validation,image_copy,buffer_texture_copies:offset_and_bytesPerRow:format="rgba8unorm";copyType="CopyB2T";dimension="2d"
@@ -95,7 +96,7 @@ fails-if(dx12) webgpu:api,validation,image_copy,buffer_texture_copies:offset_and
9596
fails-if(dx12) webgpu:api,validation,image_copy,buffer_texture_copies:offset_and_bytesPerRow:format="astc-4x4-unorm";copyType="CopyT2B";dimension="3d"
9697
fails-if(dx12) webgpu:api,validation,image_copy,buffer_texture_copies:offset_and_bytesPerRow:format="astc-4x4-unorm";copyType="CopyB2T";dimension="3d"
9798
//mix of PASS and FAIL: other subtests of offset_and_bytesPerRow. Related bugs:
98-
// https://github.com/gfx-rs/wgpu/issues/7946, https://github.com/gfx-rs/wgpu/issues/7947
99+
// https://github.com/gfx-rs/wgpu/issues/7946
99100
webgpu:api,validation,image_copy,layout_related:copy_end_overflows_u64:*
100101
// Fails with OOM in CI.
101102
fails-if(dx12) webgpu:api,validation,image_copy,layout_related:offset_alignment:*

wgpu-core/src/command/transfer.rs

Lines changed: 112 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ use crate::{
2121
TextureInitTrackerAction,
2222
},
2323
resource::{
24-
MissingBufferUsageError, MissingTextureUsageError, ParentDevice, RawResourceAccess,
24+
Buffer, MissingBufferUsageError, MissingTextureUsageError, ParentDevice, RawResourceAccess,
2525
Texture, TextureErrorDimension,
2626
},
2727
snatch::SnatchGuard,
@@ -33,7 +33,7 @@ pub type TexelCopyBufferInfo = wgt::TexelCopyBufferInfo<BufferId>;
3333
pub type TexelCopyTextureInfo = wgt::TexelCopyTextureInfo<TextureId>;
3434
pub type CopyExternalImageDestInfo = wgt::CopyExternalImageDestInfo<TextureId>;
3535

36-
#[derive(Clone, Copy, Debug)]
36+
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
3737
pub enum CopySide {
3838
Source,
3939
Destination,
@@ -276,9 +276,11 @@ pub(crate) fn extract_texture_selector<T>(
276276
///
277277
/// Copied with some modifications from WebGPU standard.
278278
///
279-
/// If successful, returns a pair `(bytes, stride)`, where:
279+
/// If successful, returns a tuple `(bytes, stride, is_contiguous)`, where:
280280
/// - `bytes` is the number of buffer bytes required for this copy, and
281281
/// - `stride` number of bytes between array layers.
282+
/// - `is_contiguous` is true if the linear texture data does not have padding
283+
/// between rows or between images.
282284
///
283285
/// [vltd]: https://gpuweb.github.io/gpuweb/#abstract-opdef-validating-linear-texture-data
284286
pub(crate) fn validate_linear_texture_data(
@@ -288,7 +290,7 @@ pub(crate) fn validate_linear_texture_data(
288290
buffer_size: BufferAddress,
289291
buffer_side: CopySide,
290292
copy_size: &Extent3d,
291-
) -> Result<(BufferAddress, BufferAddress), TransferError> {
293+
) -> Result<(BufferAddress, BufferAddress, bool), TransferError> {
292294
let wgt::BufferTextureCopyInfo {
293295
copy_width,
294296
copy_height,
@@ -303,14 +305,14 @@ pub(crate) fn validate_linear_texture_data(
303305
width_blocks: _,
304306
height_blocks,
305307

306-
row_bytes_dense: _,
308+
row_bytes_dense,
307309
row_stride_bytes,
308310

309311
image_stride_rows: _,
310312
image_stride_bytes,
311313

312314
image_rows_dense: _,
313-
image_bytes_dense: _,
315+
image_bytes_dense,
314316

315317
bytes_in_copy,
316318
} = layout.get_buffer_texture_copy_info(format, aspect, copy_size)?;
@@ -347,7 +349,10 @@ pub(crate) fn validate_linear_texture_data(
347349
});
348350
}
349351

350-
Ok((bytes_in_copy, image_stride_bytes))
352+
let is_contiguous = (row_stride_bytes == row_bytes_dense || !requires_multiple_rows)
353+
&& (image_stride_bytes == image_bytes_dense || !requires_multiple_images);
354+
355+
Ok((bytes_in_copy, image_stride_bytes, is_contiguous))
351356
}
352357

353358
/// Validate the source format of a texture copy.
@@ -733,6 +738,90 @@ fn handle_dst_texture_init(
733738
Ok(())
734739
}
735740

741+
/// Handle initialization tracking for a transfer's source or destination buffer.
742+
///
743+
/// Ensures that the transfer will not read from uninitialized memory, and updates
744+
/// the initialization state information to reflect the transfer.
745+
fn handle_buffer_init(
746+
cmd_buf_data: &mut CommandBufferMutable,
747+
info: &TexelCopyBufferInfo,
748+
buffer: &Arc<Buffer>,
749+
direction: CopySide,
750+
required_buffer_bytes_in_copy: BufferAddress,
751+
is_contiguous: bool,
752+
) {
753+
const ALIGN_SIZE: BufferAddress = wgt::COPY_BUFFER_ALIGNMENT;
754+
const ALIGN_MASK: BufferAddress = wgt::COPY_BUFFER_ALIGNMENT - 1;
755+
756+
let start = info.layout.offset;
757+
let end = info.layout.offset + required_buffer_bytes_in_copy;
758+
if !is_contiguous || direction == CopySide::Source {
759+
// If the transfer will read the buffer, then the whole region needs to
760+
// be initialized.
761+
//
762+
// If the transfer will not write a contiguous region of the buffer,
763+
// then we need to make sure the padding areas are initialized. For now,
764+
// initialize the whole region, although this could be improved to
765+
// initialize only the necessary parts if doing so is likely to be
766+
// faster than initializing the whole thing.
767+
//
768+
// Adjust the start/end outwards to 4B alignment.
769+
let aligned_start = start & !ALIGN_MASK;
770+
let aligned_end = (end + ALIGN_MASK) & !ALIGN_MASK;
771+
cmd_buf_data.buffer_memory_init_actions.extend(
772+
buffer.initialization_status.read().create_action(
773+
buffer,
774+
aligned_start..aligned_end,
775+
MemoryInitKind::NeedsInitializedMemory,
776+
),
777+
);
778+
} else {
779+
// If the transfer will write a contiguous region of the buffer, then we
780+
// don't need to initialize that region.
781+
//
782+
// However, if the start and end are not 4B aligned, we need to make
783+
// sure that we don't end up trying to initialize non-4B-aligned regions
784+
// later.
785+
//
786+
// Adjust the start/end inwards to 4B alignment, we will handle the
787+
// first/last pieces differently.
788+
let aligned_start = (start + ALIGN_MASK) & !ALIGN_MASK;
789+
let aligned_end = end & !ALIGN_MASK;
790+
if aligned_start != start {
791+
cmd_buf_data.buffer_memory_init_actions.extend(
792+
buffer.initialization_status.read().create_action(
793+
buffer,
794+
aligned_start - ALIGN_SIZE..aligned_start,
795+
MemoryInitKind::NeedsInitializedMemory,
796+
),
797+
);
798+
}
799+
if aligned_start != aligned_end {
800+
cmd_buf_data.buffer_memory_init_actions.extend(
801+
buffer.initialization_status.read().create_action(
802+
buffer,
803+
aligned_start..aligned_end,
804+
MemoryInitKind::ImplicitlyInitialized,
805+
),
806+
);
807+
}
808+
if aligned_end != end {
809+
// It is possible that `aligned_end + ALIGN_SIZE > dst_buffer.size`,
810+
// because `dst_buffer.size` is the user-requested size, not the
811+
// final size of the buffer. The final size of the buffer is not
812+
// readily available, but was rounded up to COPY_BUFFER_ALIGNMENT,
813+
// so no overrun is possible.
814+
cmd_buf_data.buffer_memory_init_actions.extend(
815+
buffer.initialization_status.read().create_action(
816+
buffer,
817+
aligned_end..aligned_end + ALIGN_SIZE,
818+
MemoryInitKind::NeedsInitializedMemory,
819+
),
820+
);
821+
}
822+
}
823+
}
824+
736825
impl Global {
737826
pub fn command_encoder_copy_buffer_to_buffer(
738827
&self,
@@ -1004,7 +1093,7 @@ impl Global {
10041093
true, // alignment required for buffer offset
10051094
)?;
10061095

1007-
let (required_buffer_bytes_in_copy, bytes_per_array_layer) =
1096+
let (required_buffer_bytes_in_copy, bytes_per_array_layer, is_contiguous) =
10081097
validate_linear_texture_data(
10091098
&source.layout,
10101099
dst_texture.desc.format,
@@ -1020,12 +1109,13 @@ impl Global {
10201109
.map_err(TransferError::from)?;
10211110
}
10221111

1023-
cmd_buf_data.buffer_memory_init_actions.extend(
1024-
src_buffer.initialization_status.read().create_action(
1025-
&src_buffer,
1026-
source.layout.offset..(source.layout.offset + required_buffer_bytes_in_copy),
1027-
MemoryInitKind::NeedsInitializedMemory,
1028-
),
1112+
handle_buffer_init(
1113+
cmd_buf_data,
1114+
source,
1115+
&src_buffer,
1116+
CopySide::Source,
1117+
required_buffer_bytes_in_copy,
1118+
is_contiguous,
10291119
);
10301120

10311121
let regions = (0..array_layer_count)
@@ -1124,7 +1214,7 @@ impl Global {
11241214
true, // alignment required for buffer offset
11251215
)?;
11261216

1127-
let (required_buffer_bytes_in_copy, bytes_per_array_layer) =
1217+
let (required_buffer_bytes_in_copy, bytes_per_array_layer, is_contiguous) =
11281218
validate_linear_texture_data(
11291219
&destination.layout,
11301220
src_texture.desc.format,
@@ -1180,13 +1270,13 @@ impl Global {
11801270
let dst_barrier =
11811271
dst_pending.map(|pending| pending.into_hal(&dst_buffer, &snatch_guard));
11821272

1183-
cmd_buf_data.buffer_memory_init_actions.extend(
1184-
dst_buffer.initialization_status.read().create_action(
1185-
&dst_buffer,
1186-
destination.layout.offset
1187-
..(destination.layout.offset + required_buffer_bytes_in_copy),
1188-
MemoryInitKind::ImplicitlyInitialized,
1189-
),
1273+
handle_buffer_init(
1274+
cmd_buf_data,
1275+
destination,
1276+
&dst_buffer,
1277+
CopySide::Destination,
1278+
required_buffer_bytes_in_copy,
1279+
is_contiguous,
11901280
);
11911281

11921282
let regions = (0..array_layer_count)

wgpu-core/src/device/queue.rs

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -763,20 +763,21 @@ impl Queue {
763763
&destination,
764764
dst_base.aspect,
765765
&dst.desc,
766-
&data_layout,
766+
data_layout,
767767
false, // alignment not required for buffer offset or bytes per row
768768
)?;
769769

770770
// Note: `_source_bytes_per_array_layer` is ignored since we
771771
// have a staging copy, and it can have a different value.
772-
let (required_bytes_in_copy, _source_bytes_per_array_layer) = validate_linear_texture_data(
773-
data_layout,
774-
dst.desc.format,
775-
destination.aspect,
776-
data.len() as wgt::BufferAddress,
777-
CopySide::Source,
778-
size,
779-
)?;
772+
let (required_bytes_in_copy, _source_bytes_per_array_layer, _) =
773+
validate_linear_texture_data(
774+
data_layout,
775+
dst.desc.format,
776+
destination.aspect,
777+
data.len() as wgt::BufferAddress,
778+
CopySide::Source,
779+
size,
780+
)?;
780781

781782
if dst.desc.format.is_depth_stencil_format() {
782783
self.device

0 commit comments

Comments
 (0)