Skip to content

Commit 1e7b89c

Browse files
authored
Allow holes in the MeshInputUniform buffer. (bevyengine#16695)
This commit removes the logic that attempted to keep the `MeshInputUniform` buffer contiguous. Not only was it slow and complex, but it was also incorrect, which caused bevyengine#16686 and bevyengine#16690. I changed the logic to simply maintain a free list of unused slots in the buffer and preferentially fill them when pushing new mesh input uniforms. Closes bevyengine#16686. Closes bevyengine#16690.
1 parent 61b98ec commit 1e7b89c

File tree

4 files changed

+79
-112
lines changed

4 files changed

+79
-112
lines changed

crates/bevy_pbr/src/render/gpu_preprocess.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -402,8 +402,8 @@ pub fn prepare_preprocess_bind_groups(
402402
} = batched_instance_buffers.into_inner();
403403

404404
let (Some(current_input_buffer), Some(previous_input_buffer), Some(data_buffer)) = (
405-
current_input_buffer_vec.buffer.buffer(),
406-
previous_input_buffer_vec.buffer.buffer(),
405+
current_input_buffer_vec.buffer().buffer(),
406+
previous_input_buffer_vec.buffer().buffer(),
407407
data_buffer_vec.buffer(),
408408
) else {
409409
return;

crates/bevy_pbr/src/render/mesh.rs

Lines changed: 19 additions & 90 deletions
Original file line numberDiff line numberDiff line change
@@ -652,22 +652,6 @@ pub struct RenderMeshInstancesCpu(MainEntityHashMap<RenderMeshInstanceCpu>);
652652
#[derive(Default, Deref, DerefMut)]
653653
pub struct RenderMeshInstancesGpu(MainEntityHashMap<RenderMeshInstanceGpu>);
654654

655-
/// The result of an entity removal.
656-
///
657-
/// We want the mesh input uniform array to have no gaps in it in order to
658-
/// simplify the GPU logic. Therefore, whenever entity data is removed from the
659-
/// [`MeshInputUniform`] buffer, the last entity is swapped in to fill it. If
660-
/// culling is enabled, the mesh culling data corresponding to an entity must
661-
/// have the same indices as the mesh input uniforms for that entity. Thus we
662-
/// need to pass this information to [`MeshCullingDataBuffer::remove`] so that
663-
/// it can update its buffer to match the [`MeshInputUniform`] buffer.
664-
struct RemovedMeshInputUniformIndices {
665-
/// The index of the mesh input that was removed.
666-
removed_index: usize,
667-
/// The index of the mesh input that was moved to fill its place.
668-
moved_index: usize,
669-
}
670-
671655
/// Maps each mesh instance to the material ID, and allocated binding ID,
672656
/// associated with that mesh instance.
673657
#[derive(Resource, Default)]
@@ -896,7 +880,7 @@ impl RenderMeshInstanceGpuBuilder {
896880
current_input_buffer: &mut InstanceInputUniformBuffer<MeshInputUniform>,
897881
previous_input_buffer: &mut InstanceInputUniformBuffer<MeshInputUniform>,
898882
mesh_allocator: &MeshAllocator,
899-
) -> usize {
883+
) -> u32 {
900884
let first_vertex_index = match mesh_allocator.mesh_vertex_slice(&self.shared.mesh_asset_id)
901885
{
902886
Some(mesh_vertex_slice) => mesh_vertex_slice.range.start,
@@ -922,39 +906,33 @@ impl RenderMeshInstanceGpuBuilder {
922906
// Yes, it did. Replace its entry with the new one.
923907

924908
// Reserve a slot.
925-
current_uniform_index =
926-
u32::from(occupied_entry.get_mut().current_uniform_index) as usize;
909+
current_uniform_index = u32::from(occupied_entry.get_mut().current_uniform_index);
927910

928911
// Save the old mesh input uniform. The mesh preprocessing
929912
// shader will need it to compute motion vectors.
930-
let previous_mesh_input_uniform =
931-
current_input_buffer.buffer.values()[current_uniform_index];
932-
let previous_input_index = previous_input_buffer
933-
.buffer
934-
.push(previous_mesh_input_uniform);
935-
previous_input_buffer.main_entities.push(entity);
936-
mesh_input_uniform.previous_input_index = previous_input_index as u32;
913+
let previous_mesh_input_uniform = current_input_buffer.get(current_uniform_index);
914+
let previous_input_index = previous_input_buffer.add(previous_mesh_input_uniform);
915+
mesh_input_uniform.previous_input_index = previous_input_index;
937916

938917
// Write in the new mesh input uniform.
939-
current_input_buffer.buffer.values_mut()[current_uniform_index] =
940-
mesh_input_uniform;
918+
current_input_buffer.set(current_uniform_index, mesh_input_uniform);
941919

942920
occupied_entry.replace_entry(RenderMeshInstanceGpu {
943921
translation: self.world_from_local.translation,
944922
shared: self.shared,
945-
current_uniform_index: NonMaxU32::new(current_uniform_index as u32)
923+
current_uniform_index: NonMaxU32::new(current_uniform_index)
946924
.unwrap_or_default(),
947925
});
948926
}
949927

950928
Entry::Vacant(vacant_entry) => {
951929
// No, this is a new entity. Push its data on to the buffer.
952-
current_uniform_index = current_input_buffer.buffer.push(mesh_input_uniform);
953-
current_input_buffer.main_entities.push(entity);
930+
current_uniform_index = current_input_buffer.add(mesh_input_uniform);
931+
954932
vacant_entry.insert(RenderMeshInstanceGpu {
955933
translation: self.world_from_local.translation,
956934
shared: self.shared,
957-
current_uniform_index: NonMaxU32::new(current_uniform_index as u32)
935+
current_uniform_index: NonMaxU32::new(current_uniform_index)
958936
.unwrap_or_default(),
959937
});
960938
}
@@ -970,34 +948,13 @@ fn remove_mesh_input_uniform(
970948
entity: MainEntity,
971949
render_mesh_instances: &mut MainEntityHashMap<RenderMeshInstanceGpu>,
972950
current_input_buffer: &mut InstanceInputUniformBuffer<MeshInputUniform>,
973-
) -> Option<RemovedMeshInputUniformIndices> {
951+
) -> Option<u32> {
974952
// Remove the uniform data.
975953
let removed_render_mesh_instance = render_mesh_instances.remove(&entity)?;
976-
let removed_uniform_index = removed_render_mesh_instance.current_uniform_index.get() as usize;
977-
978-
// Now *move* the final mesh uniform to fill its place in the buffer, so
979-
// that the buffer remains contiguous.
980-
let moved_uniform_index = current_input_buffer.buffer.len() - 1;
981-
if moved_uniform_index != removed_uniform_index {
982-
let moved_uniform = current_input_buffer.buffer.pop().unwrap();
983-
let moved_entity = current_input_buffer.main_entities.pop().unwrap();
984-
985-
current_input_buffer.buffer.values_mut()[removed_uniform_index] = moved_uniform;
986-
987-
if let Some(moved_render_mesh_instance) = render_mesh_instances.get_mut(&moved_entity) {
988-
moved_render_mesh_instance.current_uniform_index =
989-
NonMaxU32::new(removed_uniform_index as u32).unwrap_or_default();
990-
}
991-
} else {
992-
current_input_buffer.buffer.pop();
993-
current_input_buffer.main_entities.pop();
994-
}
995954

996-
// Tell our caller what we did.
997-
Some(RemovedMeshInputUniformIndices {
998-
removed_index: removed_uniform_index,
999-
moved_index: moved_uniform_index,
1000-
})
955+
let removed_uniform_index = removed_render_mesh_instance.current_uniform_index.get();
956+
current_input_buffer.remove(removed_uniform_index);
957+
Some(removed_uniform_index)
1001958
}
1002959

1003960
impl MeshCullingData {
@@ -1039,26 +996,6 @@ impl Default for MeshCullingDataBuffer {
1039996
}
1040997
}
1041998

1042-
impl MeshCullingDataBuffer {
1043-
/// Updates the culling data buffer after a mesh has been removed from the scene.
1044-
///
1045-
/// [`remove_mesh_input_uniform`] removes a mesh input uniform from the
1046-
/// buffer, swapping in the last uniform to fill the space if necessary. The
1047-
/// index of each mesh in the mesh culling data and that index of the mesh
1048-
/// in the mesh input uniform buffer must match. Thus we have to perform the
1049-
/// corresponding operation here too. [`remove_mesh_input_uniform`] returns
1050-
/// a [`RemovedMeshInputUniformIndices`] value to tell us what to do, and
1051-
/// this function is where we process that value.
1052-
fn remove(&mut self, removed_indices: RemovedMeshInputUniformIndices) {
1053-
if removed_indices.moved_index != removed_indices.removed_index {
1054-
let moved_culling_data = self.pop().unwrap();
1055-
self.values_mut()[removed_indices.removed_index] = moved_culling_data;
1056-
} else {
1057-
self.pop();
1058-
}
1059-
}
1060-
}
1061-
1062999
/// Data that [`crate::material::queue_material_meshes`] and similar systems
10631000
/// need in order to place entities that contain meshes in the right batch.
10641001
#[derive(Deref)]
@@ -1434,31 +1371,23 @@ pub fn collect_meshes_for_gpu_building(
14341371
previous_input_buffer,
14351372
&mesh_allocator,
14361373
);
1437-
mesh_culling_builder.update(&mut mesh_culling_data_buffer, instance_data_index);
1374+
mesh_culling_builder
1375+
.update(&mut mesh_culling_data_buffer, instance_data_index as usize);
14381376
}
14391377

14401378
for entity in removed.drain(..) {
1441-
if let Some(removed_indices) = remove_mesh_input_uniform(
1379+
remove_mesh_input_uniform(
14421380
entity,
14431381
&mut *render_mesh_instances,
14441382
current_input_buffer,
1445-
) {
1446-
mesh_culling_data_buffer.remove(removed_indices);
1447-
}
1383+
);
14481384
}
14491385
}
14501386
}
14511387
}
14521388

14531389
// Buffers can't be empty. Make sure there's something in the previous input buffer.
1454-
if previous_input_buffer.buffer.is_empty() {
1455-
previous_input_buffer
1456-
.buffer
1457-
.push(MeshInputUniform::default());
1458-
previous_input_buffer
1459-
.main_entities
1460-
.push(Entity::PLACEHOLDER.into());
1461-
}
1390+
previous_input_buffer.ensure_nonempty();
14621391
}
14631392

14641393
/// All data needed to construct a pipeline for rendering 3D meshes.

crates/bevy_render/src/batching/gpu_preprocessing.rs

Lines changed: 57 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ use bevy_ecs::{
1010
world::{FromWorld, World},
1111
};
1212
use bevy_encase_derive::ShaderType;
13-
use bevy_utils::tracing::error;
13+
use bevy_utils::{default, tracing::error};
1414
use bytemuck::{Pod, Zeroable};
1515
use nonmax::NonMaxU32;
1616
use wgpu::{BindingResource, BufferUsages, DownlevelFlags, Features};
@@ -24,7 +24,6 @@ use crate::{
2424
},
2525
render_resource::{BufferVec, GpuArrayBufferable, RawBufferVec, UninitBufferVec},
2626
renderer::{RenderAdapter, RenderDevice, RenderQueue},
27-
sync_world::MainEntity,
2827
view::{ExtractedView, GpuCulling, ViewTarget},
2928
Render, RenderApp, RenderSet,
3029
};
@@ -125,7 +124,7 @@ pub enum GpuPreprocessingMode {
125124
pub struct BatchedInstanceBuffers<BD, BDI>
126125
where
127126
BD: GpuArrayBufferable + Sync + Send + 'static,
128-
BDI: Pod,
127+
BDI: Pod + Default,
129128
{
130129
/// A storage area for the buffer data that the GPU compute shader is
131130
/// expected to write to.
@@ -161,44 +160,83 @@ where
161160
/// shader is expected to expand to the full *buffer data* type.
162161
pub struct InstanceInputUniformBuffer<BDI>
163162
where
164-
BDI: Pod,
163+
BDI: Pod + Default,
165164
{
166165
/// The buffer containing the data that will be uploaded to the GPU.
167-
pub buffer: RawBufferVec<BDI>,
166+
buffer: RawBufferVec<BDI>,
168167

169-
/// The main-world entity that each item in
170-
/// [`InstanceInputUniformBuffer::buffer`] corresponds to.
168+
/// Indices of slots that are free within the buffer.
171169
///
172-
/// This array must have the same length as
173-
/// [`InstanceInputUniformBuffer::buffer`]. This bookkeeping is needed when
174-
/// moving the data for an entity in the buffer (which happens when an
175-
/// entity is removed), so that we can update other data structures with the
176-
/// new buffer index of that entity.
177-
pub main_entities: Vec<MainEntity>,
170+
/// When adding data, we preferentially overwrite these slots first before
171+
/// growing the buffer itself.
172+
free_uniform_indices: Vec<u32>,
178173
}
179174

180175
impl<BDI> InstanceInputUniformBuffer<BDI>
181176
where
182-
BDI: Pod,
177+
BDI: Pod + Default,
183178
{
184179
/// Creates a new, empty buffer.
185180
pub fn new() -> InstanceInputUniformBuffer<BDI> {
186181
InstanceInputUniformBuffer {
187182
buffer: RawBufferVec::new(BufferUsages::STORAGE),
188-
main_entities: vec![],
183+
free_uniform_indices: vec![],
189184
}
190185
}
191186

192187
/// Clears the buffer and entity list out.
193188
pub fn clear(&mut self) {
194189
self.buffer.clear();
195-
self.main_entities.clear();
190+
self.free_uniform_indices.clear();
191+
}
192+
193+
/// Returns the [`RawBufferVec`] corresponding to this input uniform buffer.
194+
#[inline]
195+
pub fn buffer(&self) -> &RawBufferVec<BDI> {
196+
&self.buffer
197+
}
198+
199+
/// Adds a new piece of buffered data to the uniform buffer and returns its
200+
/// index.
201+
pub fn add(&mut self, element: BDI) -> u32 {
202+
match self.free_uniform_indices.pop() {
203+
Some(uniform_index) => {
204+
self.buffer.values_mut()[uniform_index as usize] = element;
205+
uniform_index
206+
}
207+
None => self.buffer.push(element) as u32,
208+
}
209+
}
210+
211+
/// Removes a piece of buffered data from the uniform buffer.
212+
///
213+
/// This simply marks the data as free.
214+
pub fn remove(&mut self, uniform_index: u32) {
215+
self.free_uniform_indices.push(uniform_index);
216+
}
217+
218+
/// Returns the piece of buffered data at the given index.
219+
pub fn get(&self, uniform_index: u32) -> BDI {
220+
self.buffer.values()[uniform_index as usize]
221+
}
222+
223+
/// Stores a piece of buffered data at the given index.
224+
pub fn set(&mut self, uniform_index: u32, element: BDI) {
225+
self.buffer.values_mut()[uniform_index as usize] = element;
226+
}
227+
228+
// Ensures that the buffers are nonempty, which the GPU requires before an
229+
// upload can take place.
230+
pub fn ensure_nonempty(&mut self) {
231+
if self.buffer.is_empty() {
232+
self.buffer.push(default());
233+
}
196234
}
197235
}
198236

199237
impl<BDI> Default for InstanceInputUniformBuffer<BDI>
200238
where
201-
BDI: Pod,
239+
BDI: Pod + Default,
202240
{
203241
fn default() -> Self {
204242
Self::new()
@@ -360,7 +398,7 @@ impl FromWorld for GpuPreprocessingSupport {
360398
impl<BD, BDI> BatchedInstanceBuffers<BD, BDI>
361399
where
362400
BD: GpuArrayBufferable + Sync + Send + 'static,
363-
BDI: Pod,
401+
BDI: Pod + Default,
364402
{
365403
/// Creates new buffers.
366404
pub fn new() -> Self {
@@ -393,7 +431,7 @@ where
393431
impl<BD, BDI> Default for BatchedInstanceBuffers<BD, BDI>
394432
where
395433
BD: GpuArrayBufferable + Sync + Send + 'static,
396-
BDI: Pod,
434+
BDI: Pod + Default,
397435
{
398436
fn default() -> Self {
399437
Self::new()

crates/bevy_render/src/batching/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,7 @@ pub trait GetBatchData {
104104
pub trait GetFullBatchData: GetBatchData {
105105
/// The per-instance data that was inserted into the
106106
/// [`crate::render_resource::BufferVec`] during extraction.
107-
type BufferInputData: Pod + Sync + Send;
107+
type BufferInputData: Pod + Default + Sync + Send;
108108

109109
/// Get the per-instance data to be inserted into the
110110
/// [`crate::render_resource::GpuArrayBuffer`].

0 commit comments

Comments
 (0)