Skip to content

Commit 03d355d

Browse files
authored
Pack multiple effects per particle slab (#508)
Sub-allocate each particle slab with the content of multiple effects. - Introduce a `base_particle` value per effect instance, which is the equivalent of the `base_vertex` for rendering, and corresponds to the index of the first particle in the sub-allocated slice for that effect, inside the overall slab buffer. - Store that `base_particle` in the `Spawner` and give access to all shaders which need it (most of them). - Restore the default 64k particle count per slab, which allows packing multiple effects per buffer/slab. When `debug_assertions` is active (in Debug build), fill the first value of each particle to a `NaN` (0xFFFFFFFF) to make it easier to see in RenderDoc or any other GPU debugger that the particle is unused.
1 parent ce96d01 commit 03d355d

File tree

11 files changed

+185
-97
lines changed

11 files changed

+185
-97
lines changed

src/lib.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1248,7 +1248,7 @@ fn append_spawn_events_{0}(base_child_index: u32, particle_index: u32, count: u3
12481248
{
12491249
writeln!(
12501250
&mut writeback_code,
1251-
" particle_buffer.particles[particle_index].{0} = particle.{0};",
1251+
" particle_buffer.particles[base_particle + particle_index].{0} = particle.{0};",
12521252
attribute.name()
12531253
)
12541254
.unwrap();

src/render/effect_cache.rs

Lines changed: 32 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -56,8 +56,9 @@ impl PartialOrd for EffectSlice {
5656
/// A reference to a slice allocated inside an [`ParticleSlab`].
5757
#[derive(Debug, Default, Clone, PartialEq, Eq)]
5858
pub struct SlabSliceRef {
59-
/// Range into an [`ParticleSlab`], in item count.
59+
/// Range into a [`ParticleSlab`], in item count.
6060
range: Range<u32>,
61+
/// Particle layout for the effect stored in that slice.
6162
pub(crate) particle_layout: ParticleLayout,
6263
}
6364

@@ -223,17 +224,17 @@ pub struct ParticleSlab {
223224

224225
impl ParticleSlab {
225226
/// Minimum buffer capacity to allocate, in number of particles.
226-
// FIXME - Batching is broken due to binding a single GpuSpawnerParam instead of
227-
// N, and inability for a particle index to tell which Spawner it should
228-
// use. Setting this to 1 effectively ensures that all new buffers just fit
229-
// the effect, so batching never occurs.
230-
pub const MIN_CAPACITY: u32 = 1; // 65536; // at least 64k particles
227+
pub const MIN_CAPACITY: u32 = 65536; // at least 64k particles
231228

232229
/// Create a new slab and the GPU resources to back it up.
233230
///
234231
/// The slab cannot contain less than [`MIN_CAPACITY`] particles. If the
235232
/// input `capacity` is smaller, it's rounded up to [`MIN_CAPACITY`].
236233
///
234+
/// # Panics
235+
///
236+
/// This panics if the `capacity` is zero.
237+
///
237238
/// [`MIN_CAPACITY`]: Self::MIN_CAPACITY
238239
pub fn new(
239240
slab_id: SlabId,
@@ -252,31 +253,47 @@ impl ParticleSlab {
252253

253254
// Calculate the clamped capacity of the group, in number of particles.
254255
let capacity = capacity.max(Self::MIN_CAPACITY);
255-
debug_assert!(
256+
assert!(
256257
capacity > 0,
257258
"Attempted to create a zero-sized effect buffer."
258259
);
259260

260261
// Allocate the particle buffer itself, containing the attributes of each
261262
// particle.
263+
#[cfg(debug_assertions)]
264+
let mapped_at_creation = true;
265+
#[cfg(not(debug_assertions))]
266+
let mapped_at_creation = false;
262267
let particle_capacity_bytes: BufferAddress =
263268
capacity as u64 * particle_layout.min_binding_size().get();
264269
let particle_label = format!("hanabi:buffer:slab{}:particle", slab_id.0);
265270
let particle_buffer = render_device.create_buffer(&BufferDescriptor {
266271
label: Some(&particle_label),
267272
size: particle_capacity_bytes,
268273
usage: BufferUsages::COPY_DST | BufferUsages::STORAGE,
269-
mapped_at_creation: false,
274+
mapped_at_creation,
270275
});
276+
// Set content
277+
#[cfg(debug_assertions)]
278+
{
279+
// Scope get_mapped_range_mut() to force a drop before unmap()
280+
{
281+
let slice: &mut [u8] = &mut particle_buffer
282+
.slice(..particle_capacity_bytes)
283+
.get_mapped_range_mut();
284+
let slice: &mut [u32] = cast_slice_mut(slice);
285+
slice.fill(0xFFFFFFFF);
286+
}
287+
particle_buffer.unmap();
288+
}
271289

272290
// Each indirect buffer stores 3 arrays of u32, of length the number of
273291
// particles.
274-
let capacity_bytes: BufferAddress = capacity as u64 * 4 * 3;
275-
292+
let indirect_capacity_bytes: BufferAddress = capacity as u64 * 4 * 3;
276293
let indirect_label = format!("hanabi:buffer:slab{}:indirect", slab_id.0);
277294
let indirect_index_buffer = render_device.create_buffer(&BufferDescriptor {
278295
label: Some(&indirect_label),
279-
size: capacity_bytes,
296+
size: indirect_capacity_bytes,
280297
usage: BufferUsages::COPY_DST | BufferUsages::STORAGE,
281298
mapped_at_creation: true,
282299
});
@@ -285,11 +302,11 @@ impl ParticleSlab {
285302
// Scope get_mapped_range_mut() to force a drop before unmap()
286303
{
287304
let slice: &mut [u8] = &mut indirect_index_buffer
288-
.slice(..capacity_bytes)
305+
.slice(..indirect_capacity_bytes)
289306
.get_mapped_range_mut();
290307
let slice: &mut [u32] = cast_slice_mut(slice);
291308
for index in 0..capacity {
292-
slice[3 * index as usize + 2] = capacity - 1 - index;
309+
slice[3 * index as usize + 2] = index;
293310
}
294311
}
295312
indirect_index_buffer.unmap();
@@ -337,11 +354,11 @@ impl ParticleSlab {
337354
},
338355
];
339356
let label = format!(
340-
"hanabi:bind_group_layout:render:particles@1:vfx{}",
357+
"hanabi:bind_group_layout:render:particles@1:slab{}",
341358
slab_id.0
342359
);
343360
trace!(
344-
"Creating render layout '{}' with {} entries",
361+
"Creating particles@1 layout '{}' for render pass with {} entries",
345362
label,
346363
entries.len(),
347364
);
@@ -378,16 +395,6 @@ impl ParticleSlab {
378395
&self.indirect_index_buffer
379396
}
380397

381-
#[inline]
382-
pub fn particle_offset(&self, row: u32) -> u32 {
383-
self.particle_layout.min_binding_size().get() as u32 * row
384-
}
385-
386-
#[inline]
387-
pub fn indirect_index_offset(&self, row: u32) -> u32 {
388-
row * 12
389-
}
390-
391398
/// Return a binding for the entire particle buffer.
392399
pub fn as_entire_binding_particle(&self) -> BindingResource<'_> {
393400
let capacity_bytes = self.capacity as u64 * self.particle_layout.min_binding_size().get();

src/render/event.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -166,6 +166,8 @@ pub(crate) struct CachedParentInfo {
166166
pub(crate) struct CachedChildInfo {
167167
/// ID of the slab storing the parent effect.
168168
pub parent_slab_id: SlabId,
169+
/// Offset into the slab of the parent's particles.
170+
pub parent_slab_offset: u32,
169171
/// Parent's particle layout.
170172
pub parent_particle_layout: ParticleLayout,
171173
/// Parent's buffer.
@@ -186,6 +188,7 @@ pub(crate) struct CachedChildInfo {
186188
impl CachedChildInfo {
187189
pub fn is_locally_equal(&self, other: &CachedChildInfo) -> bool {
188190
self.parent_slab_id == other.parent_slab_id
191+
&& self.parent_slab_offset == other.parent_slab_offset
189192
&& self.parent_particle_layout == other.parent_particle_layout
190193
&& self.parent_buffer_binding_source == other.parent_buffer_binding_source
191194
&& self.local_child_index == other.local_child_index

src/render/mod.rs

Lines changed: 34 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -361,6 +361,13 @@ pub(crate) struct GpuSpawnerParams {
361361
/// Index of the [`GpuDrawIndirect`] or [`GpuDrawIndexedIndirect`] for this
362362
/// effect.
363363
draw_indirect_index: u32,
364+
/// Start offset of the particles and indirect indices into the effect's
365+
/// slab, in number of particles (row index).
366+
slab_offset: u32,
367+
/// Start offset of the particles and indirect indices into the parent effect's
368+
/// slab (if the effect has a parent effect), in number of particles (row index).
369+
/// This is ignored if the effect has no parent.
370+
parent_slab_offset: u32,
364371
}
365372

366373
/// GPU representation of an indirect compute dispatch input.
@@ -2933,6 +2940,8 @@ impl EffectsMeta {
29332940
global_transform: &GlobalTransform,
29342941
spawn_count: u32,
29352942
prng_seed: u32,
2943+
slab_offset: u32,
2944+
parent_slab_offset: Option<u32>,
29362945
effect_metadata_buffer_table_id: BufferTableId,
29372946
maybe_cached_draw_indirect_args: Option<&CachedDrawIndirectArgs>,
29382947
) -> u32 {
@@ -2953,6 +2962,8 @@ impl EffectsMeta {
29532962
draw_indirect_index: maybe_cached_draw_indirect_args
29542963
.map(|cdia| cdia.get_row().0)
29552964
.unwrap_or_default(),
2965+
slab_offset,
2966+
parent_slab_offset: parent_slab_offset.unwrap_or(u32::MAX),
29562967
..default()
29572968
};
29582969
trace!("spawner params = {:?}", spawner_params);
@@ -3539,6 +3550,7 @@ pub fn allocate_parent_child_infos(
35393550

35403551
let new_cached_child_info = CachedChildInfo {
35413552
parent_slab_id: parent_cached_effect.slab_id,
3553+
parent_slab_offset: parent_cached_effect.slice.range().start,
35423554
parent_particle_layout: parent_cached_effect.slice.particle_layout.clone(),
35433555
parent_buffer_binding_source,
35443556
local_child_index,
@@ -4422,11 +4434,15 @@ pub(crate) fn prepare_batch_inputs(
44224434
trace!("layout_flags = {:?}", extracted_effect.layout_flags);
44234435
trace!("particle_layout = {:?}", effect_slice.particle_layout);
44244436

4437+
let parent_slab_offset = maybe_cached_child_info.map(|cci| cci.parent_slab_offset);
4438+
44254439
assert!(cached_effect_metadata.table_id.is_valid());
44264440
let spawner_index = effects_meta.allocate_spawner(
44274441
&extracted_spawner.transform,
44284442
extracted_spawner.spawn_count,
44294443
extracted_spawner.prng_seed,
4444+
cached_effect.slice.range().start,
4445+
parent_slab_offset,
44304446
cached_effect_metadata.table_id,
44314447
maybe_cached_draw_indirect_args,
44324448
);
@@ -6410,6 +6426,7 @@ pub(crate) fn prepare_bind_groups(
64106426
particle_buffer,
64116427
indirect_index_buffer,
64126428
effect_metadata_buffer,
6429+
&spawner_buffer,
64136430
) {
64146431
error!(
64156432
"Failed to create sort-fill bind group @0 for ribbon effect: {:?}",
@@ -6420,9 +6437,11 @@ pub(crate) fn prepare_bind_groups(
64206437

64216438
// Bind group @0 of sort-copy pass
64226439
let indirect_index_buffer = effect_buffer.indirect_index_buffer();
6423-
if let Err(err) = sort_bind_groups
6424-
.ensure_sort_copy_bind_group(indirect_index_buffer, effect_metadata_buffer)
6425-
{
6440+
if let Err(err) = sort_bind_groups.ensure_sort_copy_bind_group(
6441+
indirect_index_buffer,
6442+
effect_metadata_buffer,
6443+
&spawner_buffer,
6444+
) {
64266445
error!(
64276446
"Failed to create sort-copy bind group @0 for ribbon effect: {:?}",
64286447
err
@@ -7343,6 +7362,11 @@ impl Node for VfxSimulateNode {
73437362
return Ok(());
73447363
}
73457364

7365+
let spawner_base = effect_batch.spawner_base;
7366+
let spawner_aligned_size = effects_meta.spawner_buffer.aligned_size();
7367+
assert!(spawner_aligned_size >= GpuSpawnerParams::min_size().get() as usize);
7368+
let spawner_offset = spawner_base * spawner_aligned_size as u32;
7369+
73467370
// Bind group sort_fill@0
73477371
let particle_buffer = effect_buffer.particle_buffer();
73487372
let indirect_index_buffer = effect_buffer.indirect_index_buffer();
@@ -7355,21 +7379,14 @@ impl Node for VfxSimulateNode {
73557379
compute_pass.insert_debug_marker("ERROR:MissingSortFillBindGroup");
73567380
continue;
73577381
};
7358-
let particle_offset = effect_buffer.particle_offset(effect_batch.slice.start);
7359-
let indirect_index_offset =
7360-
effect_buffer.indirect_index_offset(effect_batch.slice.start);
73617382
let effect_metadata_offset = effects_meta
73627383
.gpu_limits
73637384
.effect_metadata_offset(effect_batch.metadata_table_id.0)
73647385
as u32;
73657386
compute_pass.set_bind_group(
73667387
0,
73677388
bind_group,
7368-
&[
7369-
particle_offset,
7370-
indirect_index_offset,
7371-
effect_metadata_offset,
7372-
],
7389+
&[effect_metadata_offset, spawner_offset],
73737390
);
73747391

73757392
compute_pass
@@ -7418,6 +7435,11 @@ impl Node for VfxSimulateNode {
74187435
return Ok(());
74197436
}
74207437

7438+
let spawner_base = effect_batch.spawner_base;
7439+
let spawner_aligned_size = effects_meta.spawner_buffer.aligned_size();
7440+
assert!(spawner_aligned_size >= GpuSpawnerParams::min_size().get() as usize);
7441+
let spawner_offset = spawner_base * spawner_aligned_size as u32;
7442+
74217443
// Bind group sort_copy@0
74227444
let indirect_index_buffer = effect_buffer.indirect_index_buffer();
74237445
let Some(bind_group) = sort_bind_groups.sort_copy_bind_group(
@@ -7428,14 +7450,13 @@ impl Node for VfxSimulateNode {
74287450
compute_pass.insert_debug_marker("ERROR:MissingSortCopyBindGroup");
74297451
continue;
74307452
};
7431-
let indirect_index_offset = effect_batch.slice.start;
74327453
let effect_metadata_offset = effects_meta
74337454
.effect_metadata_buffer
74347455
.dynamic_offset(effect_batch.metadata_table_id);
74357456
compute_pass.set_bind_group(
74367457
0,
74377458
bind_group,
7438-
&[indirect_index_offset, effect_metadata_offset],
7459+
&[effect_metadata_offset, spawner_offset],
74397460
);
74407461

74417462
compute_pass

0 commit comments

Comments
 (0)