Skip to content

Commit bf93064

Browse files
committed
metal: Create a global residency set, holding all allocated heaps
`gpu-allocator` creates heaps on which callers can allocate ranges and create "placed" resources like textures, buffers and acceleration structures. These individual resources, or the heaps as a whole, need to be made resident on the command buffer or even globally on an entire queue. In the previous API those heaps had to be made resident on individual command *encoders* with `useHeap(s):` (making an entire heap resident perfectly matches a bindless design, as opposed to making every individual resource -either placed on the heap or allocated separately- resident with `useResource(s):`). Worse, this API only applies `MTLResourceUsageRead` (exluding `RenderTarget` and `ShaderWrite` textures) which would disallow any resources on the heap to be written. Now with `MTLResidencySet` multiple heaps can be made resident with one call, defeating the performance overhead of individually "using" all heaps on *every* command *encoder*. But without tracking this inside `gpu-allocator`, users of our crate still have to manually rebuild this `MTLResidencySet` each time they change their allocations, without knowing when `gpu-allocator` created or destroyed a heap. By managing a single updated `MTLResidencySet` in `gpu-allocator`, callers can simply call `.commit()` on this object right before they submit command buffers referencing resources on these heaps, as long as they have the residency set attached to the queue in question or "used" on the command buffer that is being submitted. This removes all the performance overhead of repeatedly creating `MTLResidencySet`s, which otherwise defeats the purpose of it over plain `useHeap(s):` call(s).
1 parent 0c5fc95 commit bf93064

File tree

2 files changed

+74
-26
lines changed

2 files changed

+74
-26
lines changed

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ objc2-metal = { version = "0.3", default-features = false, features = [
4242
"MTLBuffer",
4343
"MTLDevice",
4444
"MTLHeap",
45+
"MTLResidencySet",
4546
"MTLResource",
4647
"MTLTexture",
4748
"std",

src/metal/mod.rs

Lines changed: 73 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,10 @@ use std::{backtrace::Backtrace, sync::Arc};
22

33
use log::debug;
44
use objc2::{rc::Retained, runtime::ProtocolObject};
5-
use objc2_foundation::NSString;
5+
use objc2_foundation::{ns_string, NSString};
66
use objc2_metal::{
7-
MTLCPUCacheMode, MTLDevice, MTLHeap, MTLHeapDescriptor, MTLHeapType, MTLResourceOptions,
8-
MTLStorageMode, MTLTextureDescriptor,
7+
MTLCPUCacheMode, MTLDevice, MTLHeap, MTLHeapDescriptor, MTLHeapType, MTLResidencySet,
8+
MTLResourceOptions, MTLStorageMode, MTLTextureDescriptor,
99
};
1010

1111
#[cfg(feature = "visualizer")]
@@ -150,6 +150,7 @@ impl<'a> AllocationCreateDesc<'a> {
150150

151151
pub struct Allocator {
152152
device: Retained<ProtocolObject<dyn MTLDevice>>,
153+
global_residency_set: Option<Retained<ProtocolObject<dyn MTLResidencySet>>>,
153154
debug_settings: AllocatorDebugSettings,
154155
memory_types: Vec<MemoryType>,
155156
allocation_sizes: AllocationSizes,
@@ -166,6 +167,9 @@ pub struct AllocatorCreateDesc {
166167
pub device: Retained<ProtocolObject<dyn MTLDevice>>,
167168
pub debug_settings: AllocatorDebugSettings,
168169
pub allocation_sizes: AllocationSizes,
170+
/// Whether to create a [`MTLResidencySet`] containing all live heaps, that can be retrieved via
171+
/// [`Allocator::residency_set()`]. Only supported on MacOS 15.0+ / iOS 18.0+.
172+
pub create_residency_set: bool,
169173
}
170174

171175
#[derive(Debug)]
@@ -215,6 +219,7 @@ impl MemoryBlock {
215219

216220
#[derive(Debug)]
217221
struct MemoryType {
222+
global_residency_set: Option<Retained<ProtocolObject<dyn MTLResidencySet>>>,
218223
memory_blocks: Vec<Option<MemoryBlock>>,
219224
_committed_allocations: CommittedAllocationStatistics,
220225
memory_location: MemoryLocation,
@@ -249,6 +254,10 @@ impl MemoryType {
249254
self.memory_location,
250255
)?;
251256

257+
if let Some(rs) = &self.global_residency_set {
258+
unsafe { rs.addAllocation(mem_block.heap.as_ref()) }
259+
}
260+
252261
let block_index = self.memory_blocks.iter().position(|block| block.is_none());
253262
let block_index = match block_index {
254263
Some(i) => {
@@ -317,19 +326,23 @@ impl MemoryType {
317326
}
318327
}
319328

320-
let new_memory_block = MemoryBlock::new(
329+
let mem_block = MemoryBlock::new(
321330
device,
322331
memblock_size,
323332
&self.heap_properties,
324333
false,
325334
self.memory_location,
326335
)?;
327336

337+
if let Some(rs) = &self.global_residency_set {
338+
unsafe { rs.addAllocation(mem_block.heap.as_ref()) }
339+
}
340+
328341
let new_block_index = if let Some(block_index) = empty_block_index {
329-
self.memory_blocks[block_index] = Some(new_memory_block);
342+
self.memory_blocks[block_index] = Some(mem_block);
330343
block_index
331344
} else {
332-
self.memory_blocks.push(Some(new_memory_block));
345+
self.memory_blocks.push(Some(mem_block));
333346
self.memory_blocks.len() - 1
334347
};
335348

@@ -373,28 +386,25 @@ impl MemoryType {
373386

374387
mem_block.sub_allocator.free(allocation.chunk_id)?;
375388

376-
if mem_block.sub_allocator.is_empty() {
377-
if mem_block.sub_allocator.supports_general_allocations() {
378-
if self.active_general_blocks > 1 {
379-
let block = self.memory_blocks[block_idx].take();
380-
if block.is_none() {
381-
return Err(AllocationError::Internal(
382-
"Memory block must be Some.".into(),
383-
));
384-
}
385-
// Note that `block` will be destroyed on `drop` here
389+
if mem_block.sub_allocator.is_empty()
390+
&& (!mem_block.sub_allocator.supports_general_allocations()
391+
|| self.active_general_blocks > 1)
392+
{
393+
let Some(block) = self.memory_blocks[block_idx].take() else {
394+
return Err(AllocationError::Internal(
395+
"Memory block must be Some.".into(),
396+
));
397+
};
386398

387-
self.active_general_blocks -= 1;
388-
}
389-
} else {
390-
let block = self.memory_blocks[block_idx].take();
391-
if block.is_none() {
392-
return Err(AllocationError::Internal(
393-
"Memory block must be Some.".into(),
394-
));
395-
}
396-
// Note that `block` will be destroyed on `drop` here
399+
if block.sub_allocator.supports_general_allocations() {
400+
self.active_general_blocks -= 1;
401+
}
402+
403+
if let Some(rs) = &self.global_residency_set {
404+
unsafe { rs.removeAllocation(block.heap.as_ref()) }
397405
}
406+
407+
// Note that `block` will be destroyed on `drop` here
398408
}
399409

400410
Ok(())
@@ -427,10 +437,19 @@ impl Allocator {
427437
}),
428438
];
429439

440+
let global_residency_set = desc.create_residency_set.then(|| unsafe {
441+
let rs_desc = objc2_metal::MTLResidencySetDescriptor::new();
442+
rs_desc.setLabel(Some(ns_string!("gpu-allocator global residency set")));
443+
desc.device
444+
.newResidencySetWithDescriptor_error(&rs_desc)
445+
.expect("Failed to create MTLResidencySet. Too low MacOS/iOS version?")
446+
});
447+
430448
let memory_types = heap_types
431449
.into_iter()
432450
.enumerate()
433451
.map(|(i, (memory_location, heap_descriptor))| MemoryType {
452+
global_residency_set: global_residency_set.clone(),
434453
memory_blocks: vec![],
435454
_committed_allocations: CommittedAllocationStatistics {
436455
num_allocations: 0,
@@ -448,6 +467,7 @@ impl Allocator {
448467
debug_settings: desc.debug_settings,
449468
memory_types,
450469
allocation_sizes: desc.allocation_sizes,
470+
global_residency_set,
451471
})
452472
}
453473

@@ -557,4 +577,31 @@ impl Allocator {
557577

558578
total_capacity_bytes
559579
}
580+
581+
/// Optional residency set containing all heap allocations created/owned by this allocator to
582+
/// be made resident at once when its allocations are used on the GPU. The caller _must_ invoke
583+
/// [`MTLResidencySet::commit()`] whenever these resources are used to make sure the latest
584+
/// changes are visible to Metal, e.g. before committing a command buffer.
585+
///
586+
/// This residency set can be attached to individual command buffers or to a queue directly
587+
/// since usage of allocated resources is expected to be global.
588+
///
589+
/// Alternatively callers can build up their own residency set(s) based on individual
590+
/// [`objc2_metal::MTLAllocation`]s [^heap-allocation] rather than making all heaps allocated
591+
/// via `gpu-allocator` resident at once.
592+
///
593+
/// [^heap-allocation]: Note that [`MTLHeap`]s returned by [`Allocator::heaps()`] are also
594+
/// allocations. If individual placed [`MTLResource`]s on a heap are made resident, the entire
595+
/// heap will be made resident.
596+
///
597+
/// Callers still need to be careful to make resources created outside of `gpu-allocator`
598+
/// resident on the GPU, such as indirect command buffers.
599+
///
600+
/// This residency set is only available when requested via
601+
/// [`AllocatorCreateDesc::create_residency_set`], otherwise this function returns [`None`].
602+
pub fn residency_set(&self) -> Option<&Retained<ProtocolObject<dyn MTLResidencySet>>> {
603+
// Return the retained object so that the caller also has a way to store it,
604+
// since we
605+
self.global_residency_set.as_ref()
606+
}
560607
}

0 commit comments

Comments
 (0)