Skip to content

Commit 4cc92a0

Browse files
Amortize the cost of freeing entities (#22658)
# Objective The biggest drawback of #18670 was that it made freeing `Entity`'s back to the allocator 4x slower. That meant a 20% regression in despawn performance. This PR vastly improves the performance of the entity allocator for freeing entities. ## Solution Add a local free list in pace in the main entity allocator. This is an `ArrayVec` called `quick_free`. When an entity is freed, add it to the `quick_free`. If it is full, flush the array to the full shared allocator. Currently the array has length 64, taking 512 bytes. Since this is directly included in the already massive `World` type, I don't think this is an issue, and I would guess boxing it would hurt performance here. It also means that there will be at most 64 freed entities that simply can't be allocated. This reduces the worst case maximum entity count from 4,294,967,296 to 4,294,967,232 (big deal). This also adds a new `free_many` function that is very fast compared to doing them one by one. ## Testing - CI and benches. --- ## Showcase Here are some rough benchmarks on my M2 MAX: ```txt group post_quick_free_list pre_quick_free_list pre_remote_reservation ----- -------------------- ------------------- ---------------------- entity_allocator_free/10000_entities 1.00 29.7±0.48µs ? ?/sec 1.31 38.9±0.97µs ? ?/sec 1.00 29.8±0.85µs ? ?/sec entity_allocator_free/100_entities 1.00 393.3±26.21ns ? ?/sec 1.35 531.8±26.34ns ? ?/sec 1.14 446.7±11.32ns ? ?/sec entity_allocator_free/1_entities 1.00 4.6±2.17ns ? ?/sec 42.27 195.3±32.49ns ? ?/sec 4.25 19.6±8.67ns ? ?/sec entity_allocator_free_bulk/10000_entities 1.00 8.7±0.36µs ? ?/sec entity_allocator_free_bulk/100_entities 1.00 240.9±31.01ns ? ?/sec entity_allocator_free_bulk/1_entities 1.00 206.8±39.95ns ? ?/sec ``` Looking at the cost of freeing 1,000 entities, this makes the new allocator exactly as fast as the pre-#18670 one, 30% faster than main. The new `free_many` takes 8.7µs to free 1,000 entities where the optimized `free` takes `29.7`, so another big win there. This should make up the 20% regression to despawning. It might be even faster than pre-#18670 if we increase 64 to 128 or something, but I think that's unnecessary. This could also much improve performance for despawning scenes if we can find a way to make use of `free_many`, but that's a different task.
1 parent cc3fa01 commit 4cc92a0

File tree

7 files changed

+97
-38
lines changed

7 files changed

+97
-38
lines changed

benches/benches/bevy_ecs/main.rs

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -96,9 +96,7 @@ mod world_builder {
9696

9797
// free
9898
entities.shuffle(&mut self.rng);
99-
entities
100-
.drain(..)
101-
.for_each(|e| self.world.entity_allocator_mut().free(e));
99+
self.world.entity_allocator_mut().free_many(&entities);
102100

103101
self
104102
}

benches/benches/bevy_ecs/world/entity_allocator.rs

Lines changed: 27 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,29 @@ pub fn entity_allocator_benches(criterion: &mut Criterion) {
7272

7373
group.finish();
7474

75+
let mut group = criterion.benchmark_group("entity_allocator_free_bulk");
76+
group.warm_up_time(core::time::Duration::from_millis(500));
77+
group.measurement_time(core::time::Duration::from_secs(4));
78+
79+
for entity_count in ENTITY_COUNTS {
80+
group.bench_function(format!("{entity_count}_entities"), |bencher| {
81+
bencher.iter_batched_ref(
82+
|| {
83+
let world = World::new();
84+
let entities =
85+
Vec::from_iter(world.entity_allocator().alloc_many(entity_count));
86+
(world, entities)
87+
},
88+
|(world, entities)| {
89+
world.entity_allocator_mut().free_many(entities);
90+
},
91+
BatchSize::SmallInput,
92+
);
93+
});
94+
}
95+
96+
group.finish();
97+
7598
let mut group = criterion.benchmark_group("entity_allocator_allocate_reused");
7699
group.warm_up_time(core::time::Duration::from_millis(500));
77100
group.measurement_time(core::time::Duration::from_secs(4));
@@ -81,11 +104,9 @@ pub fn entity_allocator_benches(criterion: &mut Criterion) {
81104
bencher.iter_batched_ref(
82105
|| {
83106
let mut world = World::new();
84-
let mut entities =
107+
let entities =
85108
Vec::from_iter(world.entity_allocator().alloc_many(entity_count));
86-
entities
87-
.drain(..)
88-
.for_each(|e| world.entity_allocator_mut().free(e));
109+
world.entity_allocator_mut().free_many(&entities);
89110
world
90111
},
91112
|world| {
@@ -110,11 +131,9 @@ pub fn entity_allocator_benches(criterion: &mut Criterion) {
110131
bencher.iter_batched_ref(
111132
|| {
112133
let mut world = World::new();
113-
let mut entities =
134+
let entities =
114135
Vec::from_iter(world.entity_allocator().alloc_many(entity_count));
115-
entities
116-
.drain(..)
117-
.for_each(|e| world.entity_allocator_mut().free(e));
136+
world.entity_allocator_mut().free_many(&entities);
118137
world
119138
},
120139
|world| {

crates/bevy_ecs/Cargo.toml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ default = ["std", "bevy_reflect", "async_executor", "backtrace"]
1717

1818
## Enables multithreading support. Schedules will attempt to run systems on
1919
## multiple threads whenever possible.
20-
multi_threaded = ["bevy_tasks/multi_threaded", "dep:arrayvec"]
20+
multi_threaded = ["bevy_tasks/multi_threaded"]
2121

2222
## Adds serialization support through `serde`.
2323
serialize = ["dep:serde", "bevy_platform/serialize", "indexmap/serde"]
@@ -73,7 +73,7 @@ std = [
7373
"indexmap/std",
7474
"serde?/std",
7575
"nonmax/std",
76-
"arrayvec?/std",
76+
"arrayvec/std",
7777
"log/std",
7878
"bevy_platform/std",
7979
]
@@ -114,7 +114,7 @@ derive_more = { version = "2", default-features = false, features = [
114114
"as_ref",
115115
] }
116116
nonmax = { version = "0.5", default-features = false }
117-
arrayvec = { version = "0.7.4", default-features = false, optional = true }
117+
arrayvec = { version = "0.7.4", default-features = false }
118118
smallvec = { version = "1", default-features = false, features = [
119119
"union",
120120
"const_generics",

crates/bevy_ecs/src/entity/map_entities.rs

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -404,13 +404,12 @@ mod tests {
404404
);
405405

406406
mapper.finish(&mut world);
407-
// Next allocated entity should be a further generation on the same index
408-
let entity = world.spawn_empty().id();
409-
assert_eq!(entity.index(), dead_ref.index());
410-
assert!(entity
407+
let freed_dead_ref = world.entities().resolve_from_index(dead_ref.index());
408+
assert!(freed_dead_ref
411409
.generation()
412410
.cmp_approx(&dead_ref.generation())
413411
.is_gt());
412+
assert!(world.entities().check_can_spawn_at(freed_dead_ref).is_ok());
414413
}
415414

416415
#[test]
@@ -422,12 +421,11 @@ mod tests {
422421
mapper.get_mapped(Entity::from_raw_u32(0).unwrap())
423422
});
424423

425-
// Next allocated entity should be a further generation on the same index
426-
let entity = world.spawn_empty().id();
427-
assert_eq!(entity.index(), dead_ref.index());
428-
assert!(entity
424+
let freed_dead_ref = world.entities().resolve_from_index(dead_ref.index());
425+
assert!(freed_dead_ref
429426
.generation()
430427
.cmp_approx(&dead_ref.generation())
431428
.is_gt());
429+
assert!(world.entities().check_can_spawn_at(freed_dead_ref).is_ok());
432430
}
433431
}

crates/bevy_ecs/src/entity/mod.rs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -733,6 +733,14 @@ impl EntityAllocator {
733733
self.inner.free(freed);
734734
}
735735

736+
/// This allows `freed` to be retrieved from [`alloc`](Self::alloc), etc.
737+
///
738+
/// The same caveats of [`free`](Self::free) apply here.
739+
/// (Eg. the slice should not contain duplicates.)
740+
pub fn free_many(&mut self, freed: &[Entity]) {
741+
self.inner.free_many(freed);
742+
}
743+
736744
/// Allocates some [`Entity`].
737745
/// The result could have come from a [`free`](Self::free) or be a brand new [`EntityIndex`].
738746
///

crates/bevy_ecs/src/entity/remote_allocator.rs

Lines changed: 49 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -30,9 +30,10 @@
3030
//! These types are summed up in [`SharedAllocator`], which is highly unsafe.
3131
//! The interfaces [`Allocator`] and [`RemoteAllocator`] provide safe interfaces to them.
3232
33+
use arrayvec::ArrayVec;
3334
use bevy_platform::{
3435
cell::SyncUnsafeCell,
35-
prelude::Vec,
36+
prelude::{Box, Vec},
3637
sync::{
3738
atomic::{AtomicBool, AtomicPtr, AtomicU32, AtomicU64, Ordering},
3839
Arc,
@@ -571,31 +572,35 @@ impl FreeList {
571572
self.len.state(Ordering::Relaxed).length()
572573
}
573574

574-
/// Frees the `entity` allowing it to be reused.
575+
/// Frees the `entities` allowing them to be reused.
575576
///
576577
/// # Safety
577578
///
578579
/// There must be a clear, strict order between this call and calls to [`Self::free`], [`Self::alloc_many`], and [`Self::alloc`].
579580
/// Otherwise, the compiler will make unsound optimizations.
580581
#[inline]
581-
unsafe fn free(&self, entity: Entity) {
582+
unsafe fn free(&self, entities: &[Entity]) {
582583
// Disable remote allocation.
583584
// We don't need to acquire the most recent memory from remote threads because we never read it.
584585
// We do not need to release to remote threads because we only changed the disabled bit,
585586
// which the remote allocator would with relaxed ordering.
586587
let state = self.len.disable_len_for_state(Ordering::Relaxed);
587588

588-
// Push onto the buffer
589-
let len = state.length();
590-
// SAFETY: Caller ensures this does not conflict with `free` or `alloc` calls,
591-
// and we just disabled remote allocation with a strict memory ordering.
592-
// We only call `set` during a free, and the caller ensures that is not called concurrently.
593-
unsafe {
594-
self.buffer.set(len, entity);
595-
}
589+
// Append onto the buffer
590+
let mut len = state.length();
591+
// `for_each` is typically faster than `for` here.
592+
entities.iter().for_each(|&entity| {
593+
// SAFETY: Caller ensures this does not conflict with `free` or `alloc` calls,
594+
// and we just disabled remote allocation with a strict memory ordering.
595+
// We only call `set` during a free, and the caller ensures that is not called concurrently.
596+
unsafe {
597+
self.buffer.set(len, entity);
598+
}
599+
len += 1;
600+
});
596601

597602
// Update length
598-
let new_state = state.with_length(len + 1);
603+
let new_state = state.with_length(len);
599604
// This is safe because `alloc` is not being called and `remote_alloc` checks that it is not disabled.
600605
// We don't need to change the generation since this will change the length, which changes the value anyway.
601606
// If, from a `remote_alloc` perspective, this does not change the length (i.e. this changes it *back* to what it was),
@@ -881,7 +886,11 @@ impl SharedAllocator {
881886
/// If this were cloned, that assumption would be broken, leading to undefined behavior.
882887
/// This is in contrast to the [`RemoteAllocator`], which may be cloned freely.
883888
pub(super) struct Allocator {
889+
/// The shared allocator state, which we share with any [`RemoteAllocator`]s.
884890
shared: Arc<SharedAllocator>,
891+
/// The local free list.
892+
/// We use this to amortize the cost of freeing to the shared allocator since that is expensive.
893+
local_free: Box<ArrayVec<Entity, 128>>,
885894
}
886895

887896
impl Default for Allocator {
@@ -895,6 +904,7 @@ impl Allocator {
895904
pub(super) fn new() -> Self {
896905
Self {
897906
shared: Arc::new(SharedAllocator::new()),
907+
local_free: Box::new(ArrayVec::new()),
898908
}
899909
}
900910

@@ -918,12 +928,25 @@ impl Allocator {
918928
self.shared.free.num_free()
919929
}
920930

931+
/// Flushes the [`local_free`](Self::local_free) list to the shared allocator.
932+
#[inline]
933+
fn flush_freed(&mut self) {
934+
// SAFETY: We have `&mut self`.
935+
unsafe {
936+
self.shared.free.free(self.local_free.as_slice());
937+
}
938+
self.local_free.clear();
939+
}
940+
921941
/// Frees the entity allowing it to be reused.
922942
#[inline]
923943
pub(super) fn free(&mut self, entity: Entity) {
924-
// SAFETY: We have `&mut self`.
944+
if self.local_free.is_full() {
945+
self.flush_freed();
946+
}
947+
// SAFETY: The `ArrayVec` is not full or has just been cleared.
925948
unsafe {
926-
self.shared.free.free(entity);
949+
self.local_free.push_unchecked(entity);
927950
}
928951
}
929952

@@ -933,6 +956,17 @@ impl Allocator {
933956
// SAFETY: `free` takes `&mut self`, and this lifetime is captured by the iterator.
934957
unsafe { self.shared.alloc_many(count) }
935958
}
959+
960+
/// Frees the entities allowing them to be reused.
961+
#[inline]
962+
pub(super) fn free_many(&mut self, entities: &[Entity]) {
963+
if self.local_free.try_extend_from_slice(entities).is_err() {
964+
// SAFETY: We have `&mut self`.
965+
unsafe {
966+
self.shared.free.free(entities);
967+
}
968+
}
969+
}
936970
}
937971

938972
impl Drop for Allocator {
@@ -1132,6 +1166,7 @@ mod tests {
11321166
allocator.free(e1);
11331167
allocator.free(e2);
11341168
allocator.free(e3);
1169+
allocator.flush_freed();
11351170

11361171
let r0 = allocator.alloc();
11371172
let mut many = allocator.alloc_many(2);

crates/bevy_ecs/src/world/mod.rs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4402,7 +4402,7 @@ mod tests {
44024402
world.entities.entity_get_spawn_or_despawn_tick(entity),
44034403
Some(world.change_tick())
44044404
);
4405-
world.despawn(entity);
4405+
let new = world.despawn_no_free(entity).unwrap();
44064406
assert_eq!(
44074407
world.entities.entity_get_spawned_or_despawned_by(entity),
44084408
MaybeLocation::new(Some(Location::caller()))
@@ -4411,7 +4411,8 @@ mod tests {
44114411
world.entities.entity_get_spawn_or_despawn_tick(entity),
44124412
Some(world.change_tick())
44134413
);
4414-
let new = world.spawn_empty().id();
4414+
4415+
world.spawn_empty_at(new).unwrap();
44154416
assert_eq!(entity.index(), new.index());
44164417
assert_eq!(
44174418
world.entities.entity_get_spawned_or_despawned_by(entity),

0 commit comments

Comments
 (0)