tracel-ai
diff --git a/‎Cargo.toml‎
Lines changed: 3 additions & 3 deletions b/‎Cargo.toml‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎crates/cubecl-attention/Cargo.toml‎
Lines changed: 16 additions & 7 deletions b/‎crates/cubecl-attention/Cargo.toml‎
Lines changed: 16 additions & 7 deletions
diff --git a/‎crates/cubecl-attention/src/base.rs‎
Lines changed: 38 additions & 54 deletions b/‎crates/cubecl-attention/src/base.rs‎
Lines changed: 38 additions & 54 deletions
diff --git a/‎crates/cubecl-attention/src/components/batch/base.rs‎
Lines changed: 3 additions & 3 deletions b/‎crates/cubecl-attention/src/components/batch/base.rs‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎crates/cubecl-attention/src/components/batch/simple/setup.rs‎
Lines changed: 6 additions & 6 deletions b/‎crates/cubecl-attention/src/components/batch/simple/setup.rs‎
Lines changed: 6 additions & 6 deletions
diff --git a/‎crates/cubecl-attention/src/components/error.rs‎
Lines changed: 7 additions & 1 deletion b/‎crates/cubecl-attention/src/components/error.rs‎
Lines changed: 7 additions & 1 deletion
diff --git a/‎crates/cubecl-attention/src/components/global/base.rs‎
Lines changed: 1 addition & 1 deletion b/‎crates/cubecl-attention/src/components/global/base.rs‎
Lines changed: 1 addition & 1 deletion
@@ -11,7 +11,7 @@ edition = "2024"
 license = "MIT OR Apache-2.0"
 readme = "README.md"
 rust-version = "1.88"
-version = "0.9.0-pre.2"
+version = "0.9.0-pre.3"
 
 [workspace.dependencies]
 bitflags = { version = "2.9.1", features = ["serde"] }
@@ -35,7 +35,7 @@ serde_json = { version = "1.0.119", default-features = false }
 toml = "0.9.1"
 variadics_please = "1"
 
-# no_std compatiblity
+# no_std compatibility
 dashmap = "6.1.0"
 foldhash = { version = "0.1.2", default-features = false }
 hashbrown = "0.15.5"
@@ -103,7 +103,7 @@ tracel-llvm = { version = "20.1.4-5", features = ["mlir-helpers"] }
 # tracel-llvm = { git = "https://github.com/tracel-ai/tracel-llvm.git", branch = "fix/linux", package = "tracel-llvm", features = ["mlir-helpers"] }
 # tracel-llvm = { path = "../tracel-llvm/crates/tracel-llvm", features = ["mlir-helpers"] }
 
-cudarc = { version = "0.17.7", features = [
+cudarc = { version = "0.18.1", features = [
     "std",
     "driver",
     "nvrtc",
 
@@ -15,16 +15,25 @@ default = ["std", "cubecl-runtime/default", "cubecl-core/default"]
 export_tests = ["pretty_assertions"]
 std = ["cubecl-runtime/std", "cubecl-core/std"]
 
-attention_tests = []
+attention_tests_f16 = []
+attention_tests_f32 = []
+attention_tests_unit = []
+attention_tests_blackbox_accelerated = []
+attention_tests_all = [
+    "attention_tests_f16",
+    "attention_tests_f32",
+    "attention_tests_unit",
+    "attention_tests_blackbox_accelerated",
+]
 
 [dependencies]
 bytemuck = { workspace = true }
-cubecl-common = { path = "../cubecl-common", version = "0.9.0-pre.2", default-features = false }
-cubecl-core = { path = "../cubecl-core", version = "0.9.0-pre.2", default-features = false }
-cubecl-runtime = { path = "../cubecl-runtime", version = "0.9.0-pre.2", default-features = false }
-cubecl-std = { path = "../cubecl-std", version = "0.9.0-pre.2", default-features = false }
-cubecl-matmul = { path = "../cubecl-matmul", version = "0.9.0-pre.2", default-features = false }
-cubecl-random = { path = "../cubecl-random", version = "0.9.0-pre.2", default-features = false }
+cubecl-common = { path = "../cubecl-common", version = "=0.9.0-pre.3", default-features = false }
+cubecl-core = { path = "../cubecl-core", version = "=0.9.0-pre.3", default-features = false }
+cubecl-runtime = { path = "../cubecl-runtime", version = "=0.9.0-pre.3", default-features = false }
+cubecl-std = { path = "../cubecl-std", version = "=0.9.0-pre.3", default-features = false }
+cubecl-matmul = { path = "../cubecl-matmul", version = "=0.9.0-pre.3", default-features = false }
+cubecl-random = { path = "../cubecl-random", version = "=0.9.0-pre.3", default-features = false }
 half = { workspace = true, features = ["bytemuck"] }
 pretty_assertions = { workspace = true, optional = true }
 serde = { workspace = true }
 
@@ -4,11 +4,8 @@ use cubecl_std::tensor::TensorHandle;
 
 use crate::{
     components::{
-        AttentionElems, AttentionIdent, AttentionPartitionSize, AttentionProblem,
-        AttentionSelection, AttentionSetupError, AttentionStageSize, AttentionTileSize,
-        AttentionTilingScheme, AvailableLineSizes,
+        AttentionElems, AttentionIdent, AttentionProblem, AttentionSetupError, AvailableLineSizes,
         args::{TensorArgs, TensorInputsLaunch},
-        batch::HypercubeSelection,
     },
     kernels::{Algorithm, blackbox_accelerated::BlackboxAcceleratedAlgorithm, unit::UnitAlgorithm},
 };
@@ -25,15 +22,15 @@ pub enum Strategy {
 #[allow(clippy::result_large_err, clippy::too_many_arguments)]
 pub fn launch<R: Runtime>(
     strategy: &Strategy,
-    client: &ComputeClient<R::Server>,
+    client: &ComputeClient<R>,
     query: TensorHandle<R>,
     key: TensorHandle<R>,
     value: TensorHandle<R>,
     mask: Option<TensorHandle<R>>,
     out: TensorHandle<R>,
     attention_elems: AttentionElems,
 ) -> Result<(), AttentionSetupError> {
-    launch_ref::<R>(
+    launch_ref(
         strategy,
         client,
         &query.as_ref(),
@@ -48,7 +45,7 @@ pub fn launch<R: Runtime>(
 #[allow(clippy::result_large_err, clippy::too_many_arguments)]
 pub fn launch_ref<R: Runtime>(
     strategy: &Strategy,
-    client: &ComputeClient<R::Server>,
+    client: &ComputeClient<R>,
     query: &TensorHandleRef<R>,
     key: &TensorHandleRef<R>,
     value: &TensorHandleRef<R>,
@@ -79,26 +76,35 @@ pub fn launch_ref<R: Runtime>(
 }
 
 pub fn launch_attention<R: Runtime, A: Algorithm>(
-    client: &ComputeClient<R::Server>,
+    client: &ComputeClient<R>,
     query: &TensorHandleRef<R>,
     key: &TensorHandleRef<R>,
     value: &TensorHandleRef<R>,
     mask: &Option<TensorHandleRef<R>>,
     out: &TensorHandleRef<R>,
     attention_elems: &AttentionElems,
 ) -> Result<(), AttentionSetupError> {
-    let line_sizes = AvailableLineSizes::from_elem_types::<R>(
-        query.elem_size,
-        attention_elems.mask.size(),
-        out.elem_size,
-    );
-    let line_sizes = A::filter_line_sizes(line_sizes)
-        .filter_with_tensor(AttentionIdent::Query, query.strides, query.shape)
-        .filter_with_tensor(AttentionIdent::Key, key.strides, key.shape)
-        .filter_with_tensor(AttentionIdent::Value, value.strides, value.shape)
-        .filter_with_tensor(AttentionIdent::Out, out.strides, out.shape)
-        .pick_max()
-        .unwrap();
+    let line_sizes = {
+        let ls = AvailableLineSizes::from_elem_types(
+            client,
+            query.elem_size,
+            attention_elems.mask.size(),
+            out.elem_size,
+        );
+        let ls = A::filter_line_sizes(ls)
+            .filter_with_tensor(AttentionIdent::Query, query.strides, query.shape)
+            .filter_with_tensor(AttentionIdent::Key, key.strides, key.shape)
+            .filter_with_tensor(AttentionIdent::Value, value.strides, value.shape)
+            .filter_with_tensor(AttentionIdent::Out, out.strides, out.shape);
+
+        if let Some(mask) = mask.as_ref() {
+            ls.filter_with_tensor(AttentionIdent::Mask, mask.strides, mask.shape)
+        } else {
+            ls
+        }
+    }
+    .pick_max()
+    .unwrap();
 
     let problem = AttentionProblem {
         batch: query.shape[0],
@@ -111,47 +117,22 @@ pub fn launch_attention<R: Runtime, A: Algorithm>(
         causal: false,
     };
 
-    let tile_size = AttentionTileSize {
-        seq_q: 8,
-        head_dim: 8,
-        seq_kv: 8,
-        val_dim: 8,
-    };
-
-    let selection = AttentionSelection {
-        hypercube_selection: HypercubeSelection {},
-        tiling_scheme: AttentionTilingScheme {
-            tile_size,
-            partition_size: AttentionPartitionSize {
-                seq_q: 1,
-                head_dim: 1,
-                seq_kv: 1,
-                val_dim: 1,
-            },
-            stage_size: AttentionStageSize { seq_q: 1 },
-        },
-        plane_dim: 32,
-        reuse_key_value: false,
-        two_rows_in_array_tile: false,
-    };
-
-    let config = BlackboxAcceleratedAlgorithm::setup::<R>(
+    let selection = A::selection(
         client,
         &problem,
-        &selection,
+        client.properties().hardware.plane_size_max,
         &line_sizes,
         attention_elems,
     )?;
 
+    let config = A::setup(client, &problem, &selection, &line_sizes, attention_elems)?;
+
     let cube_count_plan = config
         .hypercube_config()
         .cube_count_plan(&problem, &selection);
 
-    unsafe {
-        <BlackboxAcceleratedAlgorithm as Algorithm>::BatchAttention::launch_unchecked::<
-            TensorArgs,
-            R,
-        >(
+    let result = unsafe {
+        <A as Algorithm>::BatchAttention::launch_unchecked::<TensorArgs, R>(
             client,
             config.cube_dim(),
             cube_count_plan.resolve(),
@@ -167,8 +148,11 @@ pub fn launch_attention<R: Runtime, A: Algorithm>(
             cube_count_plan.as_args(),
             config,
             attention_elems,
-        );
-    }
+        )
+    };
 
-    Ok(())
+    match result {
+        Ok(_) => Ok(()),
+        Err(err) => Err(AttentionSetupError::Execution(err)),
+    }
 }
@@ -27,21 +27,21 @@ pub trait BatchAttentionFamily: Send + Sync + 'static {
     /// Out-of-bounds can happen
     #[allow(clippy::too_many_arguments)]
     unsafe fn launch_unchecked<'a, AA: AttentionArgs, R: Runtime>(
-        client: &ComputeClient<<R as Runtime>::Server>,
+        client: &ComputeClient<R>,
         cube_dim: CubeDim,
         cube_count: CubeCount,
         input: InputRuntimeArg<'a, AA, R>,
         output: OutputRuntimeArg<'a, AA, R>,
         cube_count_input: CubeCountInputArgs<'a, R>,
         config: Self::Config,
         dtypes: &AttentionElems,
-    );
+    ) -> Result<(), LaunchError>;
 
     /// Constructs the configuration based on the Attention problem, selection, and line sizes.
     ///
     /// This function may return an error if the configuration cannot be supported on the current runtime.
     fn setup<R: Runtime>(
-        client: &ComputeClient<R::Server>,
+        client: &ComputeClient<R>,
         problem: &AttentionProblem,
         selection: &AttentionSelection,
         line_sizes: &AttentionLineSizes,
 
@@ -1,6 +1,6 @@
 use std::marker::PhantomData;
 
-use cubecl_core::client::ComputeClient;
+use cubecl_core::{client::ComputeClient, server::LaunchError};
 
 use crate::components::{
     AttentionElems, AttentionLineSizes, AttentionPrecision, AttentionProblem, AttentionSelection,
@@ -23,13 +23,13 @@ impl<GA: GlobalAttentionFamily> BatchAttentionFamily for SimpleBatchAttentionFam
     type Config = SimpleBatchConfig<GA::Config>;
 
     fn setup<R: cubecl_core::Runtime>(
-        client: &ComputeClient<R::Server>,
+        client: &ComputeClient<R>,
         problem: &AttentionProblem,
         selection: &AttentionSelection,
         line_sizes: &AttentionLineSizes,
         dtypes: &AttentionElems,
     ) -> Result<Self::Config, crate::components::AttentionSetupError> {
-        let global_config = GA::setup::<R>(client, problem, selection, line_sizes, dtypes)?;
+        let global_config = GA::setup(client, problem, selection, line_sizes, dtypes)?;
 
         SimpleBatchConfig::new(
             global_config,
@@ -41,15 +41,15 @@ impl<GA: GlobalAttentionFamily> BatchAttentionFamily for SimpleBatchAttentionFam
     }
 
     unsafe fn launch_unchecked<'a, AA: AttentionArgs, R: cubecl_core::Runtime>(
-        client: &cubecl_core::prelude::ComputeClient<<R as cubecl_core::Runtime>::Server>,
+        client: &cubecl_core::prelude::ComputeClient<R>,
         cube_dim: cubecl_core::CubeDim,
         cube_count: cubecl_core::CubeCount,
         input: InputRuntimeArg<'a, AA, R>,
         output: OutputRuntimeArg<'a, AA, R>,
         cube_count_input: crate::components::batch::CubeCountInputArgs<'a, R>,
         config: Self::Config,
         dtypes: &AttentionElems,
-    ) {
+    ) -> Result<(), LaunchError> {
         unsafe {
             attention::launch_unchecked::<AA, Self, R>(
                 client,
@@ -60,7 +60,7 @@ impl<GA: GlobalAttentionFamily> BatchAttentionFamily for SimpleBatchAttentionFam
                 cube_count_input,
                 config,
                 dtypes.into(),
-            );
+            )
         }
     }
 }
@@ -1,4 +1,4 @@
-use cubecl_core::{CubeCount, CubeDim, LineSizeError};
+use cubecl_core::{CubeCount, CubeDim, LineSizeError, server::LaunchError};
 use cubecl_matmul::components::MatmulSetupError;
 use std::fmt::{Debug, Display};
 
@@ -15,6 +15,9 @@ pub enum AttentionSetupError {
 
     /// Error in underlying matmul
     MatmulSetup(MatmulSetupError),
+
+    /// An error that happened during execution.
+    Execution(LaunchError),
 }
 
 /// A specific feature required for attention is not available in the current runtime or hardware.
@@ -75,6 +78,9 @@ impl Debug for AttentionSetupError {
             AttentionSetupError::MatmulSetup(matmul_setup_error) => {
                 writeln!(f, "{matmul_setup_error:?}")
             }
+            AttentionSetupError::Execution(error) => {
+                writeln!(f, "{error:?}")
+            }
         }
     }
 }
 
@@ -23,7 +23,7 @@ pub trait GlobalAttentionFamily: Send + Sync + 'static {
     ///
     /// This function may return an error if the configuration cannot be supported on the current runtime.
     fn setup<R: Runtime>(
-        client: &ComputeClient<R::Server>,
+        client: &ComputeClient<R>,
         problem: &AttentionProblem,
         selection: &AttentionSelection,
         line_sizes: &AttentionLineSizes,
Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-use cubecl_core::{CubeCount, CubeDim, LineSizeError};`
	`1`	`+use cubecl_core::{CubeCount, CubeDim, LineSizeError, server::LaunchError};`
`2`	`2`	`use cubecl_matmul::components::MatmulSetupError;`
`3`	`3`	`use std::fmt::{Debug, Display};`
`4`	`4`
`@@ -15,6 +15,9 @@ pub enum AttentionSetupError {`
`15`	`15`
`16`	`16`	`/// Error in underlying matmul`
`17`	`17`	`MatmulSetup(MatmulSetupError),`
	`18`	`+`
	`19`	`+ /// An error that happened during execution.`
	`20`	`+ Execution(LaunchError),`
`18`	`21`	`}`
`19`	`22`
`20`	`23`	`/// A specific feature required for attention is not available in the current runtime or hardware.`
`@@ -75,6 +78,9 @@ impl Debug for AttentionSetupError {`
`75`	`78`	`AttentionSetupError::MatmulSetup(matmul_setup_error) => {`
`76`	`79`	`writeln!(f, "{matmul_setup_error:?}")`
`77`	`80`	`}`
	`81`	`+ AttentionSetupError::Execution(error) => {`
	`82`	`+ writeln!(f, "{error:?}")`
	`83`	`+ }`
`78`	`84`	`}`
`79`	`85`	`}`
`80`	`86`	`}`