Skip to content

Commit ce96d01

Browse files
authored
Directly use DispatchIndirectArgs in shaders (#506)
Switch to directly using an `array<DispatchIndirectArgs>` in the `vfx_indirect` compute pass, instead of a `u32` array, since those structs are already tightly packed. This makes the shader more readable.
1 parent 1968938 commit ce96d01

File tree

3 files changed

+11
-17
lines changed

3 files changed

+11
-17
lines changed

src/render/mod.rs

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2885,10 +2885,10 @@ impl EffectsMeta {
28852885

28862886
// Ensure individual GpuSpawnerParams elements are properly aligned so they can
28872887
// be addressed individually by the computer shaders.
2888-
let item_align = gpu_limits.storage_buffer_align().get() as u64;
2888+
let item_align = gpu_limits.storage_buffer_align();
28892889
trace!(
28902890
"Aligning storage buffers to {} bytes as device limits requires.",
2891-
item_align
2891+
item_align.get()
28922892
);
28932893

28942894
Self {
@@ -2900,7 +2900,7 @@ impl EffectsMeta {
29002900
sim_params_uniforms: UniformBuffer::default(),
29012901
spawner_buffer: AlignedBufferVec::new(
29022902
BufferUsages::STORAGE,
2903-
NonZeroU64::new(item_align),
2903+
Some(item_align.into()),
29042904
Some("hanabi:buffer:spawner".to_string()),
29052905
),
29062906
dispatch_indirect_buffer: GpuBuffer::new(
@@ -2914,7 +2914,7 @@ impl EffectsMeta {
29142914
),
29152915
effect_metadata_buffer: BufferTable::new(
29162916
BufferUsages::STORAGE | BufferUsages::INDIRECT,
2917-
Some(NonZeroU64::new(item_align).unwrap()),
2917+
Some(item_align.into()),
29182918
Some("hanabi:buffer:effect_metadata".to_string()),
29192919
),
29202920
gpu_limits,
@@ -6180,7 +6180,7 @@ pub(crate) fn prepare_bind_groups(
61806180
resource: effect_metadata_buffer.as_entire_binding(),
61816181
},
61826182
// @group(1) @binding(1) var<storage, read_write> dispatch_indirect_buffer
6183-
// : array<u32>;
6183+
// : array<DispatchIndirectArgs>;
61846184
BindGroupEntry {
61856185
binding: 1,
61866186
resource: dispatch_indirect_buffer.as_entire_binding(),

src/render/vfx_common.wgsl

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -110,9 +110,6 @@ struct DispatchIndirectArgs {
110110
z: u32,
111111
}
112112

113-
/// Stride in u32 count (4 bytes) of the DispatchIndirectArgs struct.
114-
const DISPATCH_INDIRECT_STRIDE: u32 = 3u;
115-
116113
/// Indirect draw (non-indexed) dispatch struct for GPU-driven passes. The layout of this struct is dictated by WGSL.
117114
/// See https://docs.rs/wgpu/latest/wgpu/util/struct.DrawIndirectArgs.html.
118115
struct DrawIndirectArgs {
@@ -216,7 +213,6 @@ struct EffectMetadata {
216213
/// of an array, or sometimes individually as a single unit. In the later case,
217214
/// we need it to be aligned to the GPU limits of the device. That limit is only
218215
/// known at runtime when initializing the WebGPU device.
219-
// FIXME - not anymore, but would be again with proper batching, so keep for now
220216
{{EFFECT_METADATA_PADDING}}
221217
}
222218

src/render/vfx_indirect.wgsl

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,15 @@
11
#import bevy_hanabi::vfx_common::{
2-
ChildInfo, ChildInfoBuffer, SimParams, Spawner,
2+
ChildInfo, ChildInfoBuffer, DispatchIndirectArgs, SimParams, Spawner,
33
EM_OFFSET_ALIVE_COUNT, EM_OFFSET_MAX_UPDATE, EM_OFFSET_CAPACITY,
44
EM_OFFSET_MAX_SPAWN, EM_OFFSET_INDIRECT_DISPATCH_INDEX, DRAW_INDEXED_INDIRECT_STRIDE,
5-
EM_OFFSET_INDIRECT_WRITE_INDEX, DISPATCH_INDIRECT_STRIDE, EFFECT_METADATA_STRIDE
5+
EM_OFFSET_INDIRECT_WRITE_INDEX, EFFECT_METADATA_STRIDE
66
}
77

88
@group(0) @binding(0) var<uniform> sim_params : SimParams;
99

1010
// Tightly packed array of EffectMetadata[], accessed as u32 array.
1111
@group(1) @binding(0) var<storage, read_write> effect_metadata_buffer : array<u32>;
12-
// Tightly packed array of DispatchIndirectArgs[], accessed as u32 array.
13-
@group(1) @binding(1) var<storage, read_write> dispatch_indirect_buffer : array<u32>;
12+
@group(1) @binding(1) var<storage, read_write> dispatch_indirect_buffer : array<DispatchIndirectArgs>;
1413
// Tightly packed array of DrawIndexedIndirectArgs[], accessed as u32 array. This can contain
1514
// some DrawIndirectArgs[] instead, but in that case the stride is adjusted so all rows have
1615
// the same size. Since we access the instance_count, which is at the same position in both,
@@ -70,10 +69,9 @@ fn main(@builtin(global_invocation_id) global_invocation_id: vec3<u32>) {
7069
// Calculate the number of workgroups (thread groups) to dispatch for the update
7170
// pass, which is the number of alive particles rounded up to 64 (workgroup_size).
7271
let indirect_dispatch_index = effect_metadata_buffer[em_base + EM_OFFSET_INDIRECT_DISPATCH_INDEX];
73-
let di_base = DISPATCH_INDIRECT_STRIDE * indirect_dispatch_index;
74-
dispatch_indirect_buffer[di_base] = (alive_count + 63u) >> 6u;
75-
dispatch_indirect_buffer[di_base + 1u] = 1u;
76-
dispatch_indirect_buffer[di_base + 2u] = 1u;
72+
dispatch_indirect_buffer[indirect_dispatch_index].x = (alive_count + 63u) >> 6u;
73+
dispatch_indirect_buffer[indirect_dispatch_index].y = 1u;
74+
dispatch_indirect_buffer[indirect_dispatch_index].z = 1u;
7775

7876
// Swap ping/pong buffers. The update pass always writes into ping, and both the update
7977
// pass and the render pass always read from pong.

0 commit comments

Comments
 (0)