Skip to content

Commit 1ad50b6

Browse files
authored
metal: Create a global residency set, holding all allocated heaps (#273)
`gpu-allocator` creates heaps on which callers can allocate ranges and create "placed" resources like textures, buffers and acceleration structures. These individual resources, or the heaps as a whole, need to be made resident on the command buffer or even globally on an entire queue. In the previous API those heaps had to be made resident on individual command *encoders* with `useHeap(s):` (making an entire heap resident perfectly matches a bindless design, as opposed to making every individual resource -either placed on the heap or allocated separately- resident with `useResource(s):`). Worse, this API only applies `MTLResourceUsageRead` (exluding `RenderTarget` and `ShaderWrite` textures) which would disallow any resources on the heap to be written. Now with `MTLResidencySet` multiple heaps can be made resident with one call, defeating the performance overhead of individually "using" all heaps on *every* command *encoder*. But without tracking this inside `gpu-allocator`, users of our crate still have to manually rebuild this `MTLResidencySet` each time they change their allocations, without knowing when `gpu-allocator` created or destroyed a heap. By managing a single updated `MTLResidencySet` in `gpu-allocator`, callers can simply call `.commit()` on this object right before they submit command buffers referencing resources on these heaps, as long as they have the residency set attached to the queue in question or "used" on the command buffer that is being submitted. This removes all the performance overhead of repeatedly creating `MTLResidencySet`s, which otherwise defeats the purpose of it over plain `useHeap(s):` call(s).
1 parent 0c5fc95 commit 1ad50b6

File tree

8 files changed

+117
-64
lines changed

8 files changed

+117
-64
lines changed

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ objc2-metal = { version = "0.3", default-features = false, features = [
4242
"MTLBuffer",
4343
"MTLDevice",
4444
"MTLHeap",
45+
"MTLResidencySet",
4546
"MTLResource",
4647
"MTLTexture",
4748
"std",

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,7 @@ let mut allocator = Allocator::new(&AllocatorCreateDesc {
131131
device: device.clone(),
132132
debug_settings: Default::default(),
133133
allocation_sizes: Default::default(),
134+
create_residency_set: false,
134135
});
135136
```
136137

examples/metal-buffer.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ fn main() {
2121
device: device.clone(),
2222
debug_settings: Default::default(),
2323
allocation_sizes: Default::default(),
24+
create_residency_set: false,
2425
})
2526
.unwrap();
2627

src/allocator/mod.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,8 @@ pub(crate) trait SubAllocator: SubAllocatorBase + fmt::Debug + Sync + Send {
133133

134134
fn report_allocations(&self) -> Vec<AllocationReport>;
135135

136+
/// Returns [`true`] if this allocator allows sub-allocating multiple allocations, [`false`] if
137+
/// it is designed to only represent dedicated allocations.
136138
#[must_use]
137139
fn supports_general_allocations(&self) -> bool;
138140
#[must_use]

src/d3d12/mod.rs

Lines changed: 15 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -475,28 +475,22 @@ impl MemoryType {
475475

476476
mem_block.sub_allocator.free(allocation.chunk_id)?;
477477

478-
if mem_block.sub_allocator.is_empty() {
479-
if mem_block.sub_allocator.supports_general_allocations() {
480-
if self.active_general_blocks > 1 {
481-
let block = self.memory_blocks[block_idx].take();
482-
if block.is_none() {
483-
return Err(AllocationError::Internal(
484-
"Memory block must be Some.".into(),
485-
));
486-
}
487-
// Note that `block` will be destroyed on `drop` here
488-
489-
self.active_general_blocks -= 1;
490-
}
491-
} else {
492-
let block = self.memory_blocks[block_idx].take();
493-
if block.is_none() {
494-
return Err(AllocationError::Internal(
495-
"Memory block must be Some.".into(),
496-
));
497-
}
498-
// Note that `block` will be destroyed on `drop` here
478+
// We only want to destroy this now-empty block if it is either a dedicated/personal
479+
// allocation, or a block supporting sub-allocations that is not the last one (ensuring
480+
// there's always at least one block/allocator readily available).
481+
let is_dedicated_or_not_last_general_block =
482+
!mem_block.sub_allocator.supports_general_allocations()
483+
|| self.active_general_blocks > 1;
484+
if mem_block.sub_allocator.is_empty() && is_dedicated_or_not_last_general_block {
485+
let block = self.memory_blocks[block_idx]
486+
.take()
487+
.ok_or_else(|| AllocationError::Internal("Memory block must be Some.".into()))?;
488+
489+
if block.sub_allocator.supports_general_allocations() {
490+
self.active_general_blocks -= 1;
499491
}
492+
493+
// Note that `block` will be destroyed on `drop` here
500494
}
501495

502496
Ok(())

src/lib.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -160,6 +160,7 @@
160160
//! device: device.clone(),
161161
//! debug_settings: Default::default(),
162162
//! allocation_sizes: Default::default(),
163+
//! create_residency_set: false,
163164
//! });
164165
//! # }
165166
//! # #[cfg(not(feature = "metal"))]
@@ -177,6 +178,7 @@
177178
//! # device: device.clone(),
178179
//! # debug_settings: Default::default(),
179180
//! # allocation_sizes: Default::default(),
181+
//! # create_residency_set: false,
180182
//! # })
181183
//! # .unwrap();
182184
//! let allocation_desc = AllocationCreateDesc::buffer(

src/metal/mod.rs

Lines changed: 80 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,12 @@ use std::{backtrace::Backtrace, sync::Arc};
22

33
use log::debug;
44
use objc2::{rc::Retained, runtime::ProtocolObject};
5-
use objc2_foundation::NSString;
5+
use objc2_foundation::{ns_string, NSString};
6+
#[cfg(doc)]
7+
use objc2_metal::{MTLAllocation, MTLResource};
68
use objc2_metal::{
7-
MTLCPUCacheMode, MTLDevice, MTLHeap, MTLHeapDescriptor, MTLHeapType, MTLResourceOptions,
8-
MTLStorageMode, MTLTextureDescriptor,
9+
MTLCPUCacheMode, MTLDevice, MTLHeap, MTLHeapDescriptor, MTLHeapType, MTLResidencySet,
10+
MTLResourceOptions, MTLStorageMode, MTLTextureDescriptor,
911
};
1012

1113
#[cfg(feature = "visualizer")]
@@ -150,6 +152,7 @@ impl<'a> AllocationCreateDesc<'a> {
150152

151153
pub struct Allocator {
152154
device: Retained<ProtocolObject<dyn MTLDevice>>,
155+
global_residency_set: Option<Retained<ProtocolObject<dyn MTLResidencySet>>>,
153156
debug_settings: AllocatorDebugSettings,
154157
memory_types: Vec<MemoryType>,
155158
allocation_sizes: AllocationSizes,
@@ -166,6 +169,9 @@ pub struct AllocatorCreateDesc {
166169
pub device: Retained<ProtocolObject<dyn MTLDevice>>,
167170
pub debug_settings: AllocatorDebugSettings,
168171
pub allocation_sizes: AllocationSizes,
172+
/// Whether to create a [`MTLResidencySet`] containing all live heaps, that can be retrieved via
173+
/// [`Allocator::residency_set()`]. Only supported on `MacOS 15.0+` / `iOS 18.0+`.
174+
pub create_residency_set: bool,
169175
}
170176

171177
#[derive(Debug)]
@@ -215,6 +221,7 @@ impl MemoryBlock {
215221

216222
#[derive(Debug)]
217223
struct MemoryType {
224+
global_residency_set: Option<Retained<ProtocolObject<dyn MTLResidencySet>>>,
218225
memory_blocks: Vec<Option<MemoryBlock>>,
219226
_committed_allocations: CommittedAllocationStatistics,
220227
memory_location: MemoryLocation,
@@ -249,6 +256,10 @@ impl MemoryType {
249256
self.memory_location,
250257
)?;
251258

259+
if let Some(rs) = &self.global_residency_set {
260+
unsafe { rs.addAllocation(mem_block.heap.as_ref()) }
261+
}
262+
252263
let block_index = self.memory_blocks.iter().position(|block| block.is_none());
253264
let block_index = match block_index {
254265
Some(i) => {
@@ -317,19 +328,23 @@ impl MemoryType {
317328
}
318329
}
319330

320-
let new_memory_block = MemoryBlock::new(
331+
let mem_block = MemoryBlock::new(
321332
device,
322333
memblock_size,
323334
&self.heap_properties,
324335
false,
325336
self.memory_location,
326337
)?;
327338

339+
if let Some(rs) = &self.global_residency_set {
340+
unsafe { rs.addAllocation(mem_block.heap.as_ref()) }
341+
}
342+
328343
let new_block_index = if let Some(block_index) = empty_block_index {
329-
self.memory_blocks[block_index] = Some(new_memory_block);
344+
self.memory_blocks[block_index] = Some(mem_block);
330345
block_index
331346
} else {
332-
self.memory_blocks.push(Some(new_memory_block));
347+
self.memory_blocks.push(Some(mem_block));
333348
self.memory_blocks.len() - 1
334349
};
335350

@@ -373,28 +388,26 @@ impl MemoryType {
373388

374389
mem_block.sub_allocator.free(allocation.chunk_id)?;
375390

376-
if mem_block.sub_allocator.is_empty() {
377-
if mem_block.sub_allocator.supports_general_allocations() {
378-
if self.active_general_blocks > 1 {
379-
let block = self.memory_blocks[block_idx].take();
380-
if block.is_none() {
381-
return Err(AllocationError::Internal(
382-
"Memory block must be Some.".into(),
383-
));
384-
}
385-
// Note that `block` will be destroyed on `drop` here
391+
// We only want to destroy this now-empty block if it is either a dedicated/personal
392+
// allocation, or a block supporting sub-allocations that is not the last one (ensuring
393+
// there's always at least one block/allocator readily available).
394+
let is_dedicated_or_not_last_general_block =
395+
!mem_block.sub_allocator.supports_general_allocations()
396+
|| self.active_general_blocks > 1;
397+
if mem_block.sub_allocator.is_empty() && is_dedicated_or_not_last_general_block {
398+
let block = self.memory_blocks[block_idx]
399+
.take()
400+
.ok_or_else(|| AllocationError::Internal("Memory block must be Some.".into()))?;
401+
402+
if block.sub_allocator.supports_general_allocations() {
403+
self.active_general_blocks -= 1;
404+
}
386405

387-
self.active_general_blocks -= 1;
388-
}
389-
} else {
390-
let block = self.memory_blocks[block_idx].take();
391-
if block.is_none() {
392-
return Err(AllocationError::Internal(
393-
"Memory block must be Some.".into(),
394-
));
395-
}
396-
// Note that `block` will be destroyed on `drop` here
406+
if let Some(rs) = &self.global_residency_set {
407+
unsafe { rs.removeAllocation(block.heap.as_ref()) }
397408
}
409+
410+
// Note that `block` will be destroyed on `drop` here
398411
}
399412

400413
Ok(())
@@ -427,10 +440,23 @@ impl Allocator {
427440
}),
428441
];
429442

443+
let global_residency_set = if desc.create_residency_set {
444+
Some(unsafe {
445+
let rs_desc = objc2_metal::MTLResidencySetDescriptor::new();
446+
rs_desc.setLabel(Some(ns_string!("gpu-allocator global residency set")));
447+
desc.device
448+
.newResidencySetWithDescriptor_error(&rs_desc)
449+
.expect("Failed to create MTLResidencySet. Unsupported MacOS/iOS version?")
450+
})
451+
} else {
452+
None
453+
};
454+
430455
let memory_types = heap_types
431456
.into_iter()
432457
.enumerate()
433458
.map(|(i, (memory_location, heap_descriptor))| MemoryType {
459+
global_residency_set: global_residency_set.clone(),
434460
memory_blocks: vec![],
435461
_committed_allocations: CommittedAllocationStatistics {
436462
num_allocations: 0,
@@ -448,6 +474,7 @@ impl Allocator {
448474
debug_settings: desc.debug_settings,
449475
memory_types,
450476
allocation_sizes: desc.allocation_sizes,
477+
global_residency_set,
451478
})
452479
}
453480

@@ -557,4 +584,31 @@ impl Allocator {
557584

558585
total_capacity_bytes
559586
}
587+
588+
/// Optional residency set containing all heap allocations created/owned by this allocator to
589+
/// be made resident at once when its allocations are used on the GPU. The caller _must_ invoke
590+
/// [`MTLResidencySet::commit()`] whenever these resources are used to make sure the latest
591+
/// changes are visible to Metal, e.g. before committing a command buffer.
592+
///
593+
/// This residency set can be attached to individual command buffers or to a queue directly
594+
/// since usage of allocated resources is expected to be global.
595+
///
596+
/// Alternatively callers can build up their own residency set(s) based on individual
597+
/// [`MTLAllocation`]s [^heap-allocation] rather than making all heaps allocated via
598+
/// `gpu-allocator` resident at once.
599+
///
600+
/// [^heap-allocation]: Note that [`MTLHeap`]s returned by [`Allocator::heaps()`] are also
601+
/// allocations. If individual placed [`MTLResource`]s on a heap are made resident, the entire
602+
/// heap will be made resident.
603+
///
604+
/// Callers still need to be careful to make resources created outside of `gpu-allocator`
605+
/// resident on the GPU, such as indirect command buffers.
606+
///
607+
/// This residency set is only available when requested via
608+
/// [`AllocatorCreateDesc::create_residency_set`], otherwise this function returns [`None`].
609+
pub fn residency_set(&self) -> Option<&Retained<ProtocolObject<dyn MTLResidencySet>>> {
610+
// Return the retained object so that the caller also has a way to store it, since we will
611+
// keep using and updating the same object going forward.
612+
self.global_residency_set.as_ref()
613+
}
560614
}

src/vulkan/mod.rs

Lines changed: 15 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -657,24 +657,22 @@ impl MemoryType {
657657

658658
mem_block.sub_allocator.free(allocation.chunk_id)?;
659659

660-
if mem_block.sub_allocator.is_empty() {
661-
if mem_block.sub_allocator.supports_general_allocations() {
662-
if self.active_general_blocks > 1 {
663-
let block = self.memory_blocks[block_idx].take();
664-
let block = block.ok_or_else(|| {
665-
AllocationError::Internal("Memory block must be Some.".into())
666-
})?;
667-
block.destroy(device);
668-
669-
self.active_general_blocks -= 1;
670-
}
671-
} else {
672-
let block = self.memory_blocks[block_idx].take();
673-
let block = block.ok_or_else(|| {
674-
AllocationError::Internal("Memory block must be Some.".into())
675-
})?;
676-
block.destroy(device);
660+
// We only want to destroy this now-empty block if it is either a dedicated/personal
661+
// allocation, or a block supporting sub-allocations that is not the last one (ensuring
662+
// there's always at least one block/allocator readily available).
663+
let is_dedicated_or_not_last_general_block =
664+
!mem_block.sub_allocator.supports_general_allocations()
665+
|| self.active_general_blocks > 1;
666+
if mem_block.sub_allocator.is_empty() && is_dedicated_or_not_last_general_block {
667+
let block = self.memory_blocks[block_idx]
668+
.take()
669+
.ok_or_else(|| AllocationError::Internal("Memory block must be Some.".into()))?;
670+
671+
if block.sub_allocator.supports_general_allocations() {
672+
self.active_general_blocks -= 1;
677673
}
674+
675+
block.destroy(device);
678676
}
679677

680678
Ok(())

0 commit comments

Comments
 (0)