Skip to content

Commit a1a81e5

Browse files
authored
Parallelize extract_meshes (#9966)
# Objective `extract_meshes` can easily be one of the most expensive operations in the blocking extract schedule for 3D apps. It also has no fundamentally serialized parts and can easily be run across multiple threads. Let's speed it up by parallelizing it! ## Solution Use the `ThreadLocal<Cell<Vec<T>>>` approach utilized by #7348 in conjunction with `Query::par_iter` to build a set of thread-local queues, and collect them after going wide. ## Performance Using `cargo run --profile stress-test --features trace_tracy --example many_cubes`. Yellow is this PR. Red is main. `extract_meshes`: ![image](https://github.com/bevyengine/bevy/assets/3137680/9d45aa2e-3cfa-4fad-9c08-53498b51a73b) An average reduction from 1.2ms to 770us is seen, a 41.6% improvement. Note: this is still not including #9950's changes, so this may actually result in even faster speedups once that's merged in.
1 parent 1d7577f commit a1a81e5

File tree

2 files changed

+52
-40
lines changed

2 files changed

+52
-40
lines changed

crates/bevy_pbr/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,3 +33,4 @@ bytemuck = { version = "1", features = ["derive"] }
3333
naga_oil = "0.8"
3434
radsort = "0.1"
3535
smallvec = "1.6"
36+
thread_local = "1.0"

crates/bevy_pbr/src/render/mesh.rs

Lines changed: 51 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,8 @@ use bevy_render::{
4545
};
4646
use bevy_transform::components::GlobalTransform;
4747
use bevy_utils::{tracing::error, EntityHashMap, HashMap, Hashed};
48+
use std::cell::Cell;
49+
use thread_local::ThreadLocal;
4850

4951
use crate::render::{
5052
morph::{
@@ -246,6 +248,7 @@ pub fn extract_meshes(
246248
mut commands: Commands,
247249
mut previous_len: Local<usize>,
248250
mut render_mesh_instances: ResMut<RenderMeshInstances>,
251+
mut thread_local_queues: Local<ThreadLocal<Cell<Vec<(Entity, RenderMeshInstance)>>>>,
249252
meshes_query: Extract<
250253
Query<(
251254
Entity,
@@ -259,50 +262,58 @@ pub fn extract_meshes(
259262
)>,
260263
>,
261264
) {
265+
meshes_query.par_iter().for_each(
266+
|(
267+
entity,
268+
view_visibility,
269+
transform,
270+
previous_transform,
271+
handle,
272+
not_receiver,
273+
not_caster,
274+
no_automatic_batching,
275+
)| {
276+
if !view_visibility.get() {
277+
return;
278+
}
279+
let transform = transform.affine();
280+
let previous_transform = previous_transform.map(|t| t.0).unwrap_or(transform);
281+
let mut flags = if not_receiver.is_some() {
282+
MeshFlags::empty()
283+
} else {
284+
MeshFlags::SHADOW_RECEIVER
285+
};
286+
if transform.matrix3.determinant().is_sign_positive() {
287+
flags |= MeshFlags::SIGN_DETERMINANT_MODEL_3X3;
288+
}
289+
let transforms = MeshTransforms {
290+
transform: (&transform).into(),
291+
previous_transform: (&previous_transform).into(),
292+
flags: flags.bits(),
293+
};
294+
let tls = thread_local_queues.get_or_default();
295+
let mut queue = tls.take();
296+
queue.push((
297+
entity,
298+
RenderMeshInstance {
299+
mesh_asset_id: handle.id(),
300+
transforms,
301+
shadow_caster: not_caster.is_none(),
302+
material_bind_group_id: MaterialBindGroupId::default(),
303+
automatic_batching: !no_automatic_batching,
304+
},
305+
));
306+
tls.set(queue);
307+
},
308+
);
309+
262310
render_mesh_instances.clear();
263311
let mut entities = Vec::with_capacity(*previous_len);
264-
265-
let visible_meshes = meshes_query.iter().filter(|(_, vis, ..)| vis.get());
266-
267-
for (
268-
entity,
269-
_,
270-
transform,
271-
previous_transform,
272-
handle,
273-
not_receiver,
274-
not_caster,
275-
no_automatic_batching,
276-
) in visible_meshes
277-
{
278-
let transform = transform.affine();
279-
let previous_transform = previous_transform.map(|t| t.0).unwrap_or(transform);
280-
let mut flags = if not_receiver.is_some() {
281-
MeshFlags::empty()
282-
} else {
283-
MeshFlags::SHADOW_RECEIVER
284-
};
285-
if transform.matrix3.determinant().is_sign_positive() {
286-
flags |= MeshFlags::SIGN_DETERMINANT_MODEL_3X3;
287-
}
288-
let transforms = MeshTransforms {
289-
transform: (&transform).into(),
290-
previous_transform: (&previous_transform).into(),
291-
flags: flags.bits(),
292-
};
312+
for queue in thread_local_queues.iter_mut() {
293313
// FIXME: Remove this - it is just a workaround to enable rendering to work as
294314
// render commands require an entity to exist at the moment.
295-
entities.push((entity, Mesh3d));
296-
render_mesh_instances.insert(
297-
entity,
298-
RenderMeshInstance {
299-
mesh_asset_id: handle.id(),
300-
transforms,
301-
shadow_caster: not_caster.is_none(),
302-
material_bind_group_id: MaterialBindGroupId::default(),
303-
automatic_batching: !no_automatic_batching,
304-
},
305-
);
315+
entities.extend(queue.get_mut().iter().map(|(e, _)| (*e, Mesh3d)));
316+
render_mesh_instances.extend(queue.get_mut().drain(..));
306317
}
307318
*previous_len = entities.len();
308319
commands.insert_or_spawn_batch(entities);

0 commit comments

Comments
 (0)