Skip to content

Commit db08cc0

Browse files
authored
Add command buffer pool for improved multi-threaded Metal performance (#3175)
* add initial pool implementation * update implementation to fix breaking tests * add pool based tests to main test suite * fix ordering types to avoid race conditions * add in flight processing * improve error handling and add wait flush test * ensure flush state is returned from entry * rename vars for clarity * address pr comments * update error mapping * update to select entry with max compute count * update tests and set default pool size
1 parent 87653ca commit db08cc0

File tree

3 files changed

+307
-136
lines changed

3 files changed

+307
-136
lines changed

candle-metal-kernels/examples/metal_benchmarks.rs

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,12 @@
11
use anyhow::Result;
22
use candle_metal_kernels::{
3-
metal::{create_command_buffer, Device},
3+
metal::{create_command_buffer, CommandSemaphore, Device},
44
GemmDType, RESOURCE_OPTIONS,
55
};
66
/// This example contains some simple benchmarks so that it's easy to run them in perf etc.
77
use clap::{Parser, Subcommand};
88
use half::f16;
9+
use std::sync::Arc;
910

1011
fn run_gemm(f32: bool, n: usize) -> Result<()> {
1112
const WARMUP_ITERS: usize = 2;
@@ -65,7 +66,8 @@ fn run_gemm(f32: bool, n: usize) -> Result<()> {
6566
let mut sum_dt = 0f64;
6667
let mut iters = 0usize;
6768
for idx in 0.. {
68-
let command_buffer = create_command_buffer(&command_queue).unwrap();
69+
let semaphore = Arc::new(CommandSemaphore::new());
70+
let command_buffer = create_command_buffer(&command_queue, semaphore).unwrap();
6971
let start_time = std::time::Instant::now();
7072
candle_metal_kernels::call_mlx_gemm(
7173
&device,

0 commit comments

Comments
 (0)