tracel-ai
diff --git a/‎crates/cubecl-attention/src/base.rs‎
Lines changed: 10 additions & 4 deletions b/‎crates/cubecl-attention/src/base.rs‎
Lines changed: 10 additions & 4 deletions
diff --git a/‎crates/cubecl-attention/src/components/args.rs‎
Lines changed: 10 additions & 4 deletions b/‎crates/cubecl-attention/src/components/args.rs‎
Lines changed: 10 additions & 4 deletions
diff --git a/‎crates/cubecl-convolution/src/components/global/args.rs‎
Lines changed: 25 additions & 8 deletions b/‎crates/cubecl-convolution/src/components/global/args.rs‎
Lines changed: 25 additions & 8 deletions
diff --git a/‎crates/cubecl-convolution/src/tests/convolution_test_launcher.rs‎
Lines changed: 6 additions & 9 deletions b/‎crates/cubecl-convolution/src/tests/convolution_test_launcher.rs‎
Lines changed: 6 additions & 9 deletions
diff --git a/‎crates/cubecl-core/src/frontend/container/tensor/launch.rs‎
Lines changed: 108 additions & 2 deletions b/‎crates/cubecl-core/src/frontend/container/tensor/launch.rs‎
Lines changed: 108 additions & 2 deletions
diff --git a/‎crates/cubecl-core/src/runtime_tests/mod.rs‎
Lines changed: 2 additions & 0 deletions b/‎crates/cubecl-core/src/runtime_tests/mod.rs‎
Lines changed: 2 additions & 0 deletions
@@ -114,11 +114,17 @@ pub fn launch_tmp<R: Runtime, AP: AttentionPrecision>(
             config.cube_dim(),
             cube_count_plan.resolve(),
             TensorInputsLaunch::new(
-                query.as_tensor_arg(line_sizes.query),
-                key.as_tensor_arg(line_sizes.key),
-                value.as_tensor_arg(line_sizes.value),
+                query
+                    .try_as_tensor_arg(line_sizes.query)
+                    .expect("valid vectorisation for query"),
+                key.try_as_tensor_arg(line_sizes.key)
+                    .expect("valid vectorisation for key"),
+                value
+                    .try_as_tensor_arg(line_sizes.value)
+                    .expect("valid vectorisation for value"),
             ),
-            out.as_tensor_arg(line_sizes.out),
+            out.try_as_tensor_arg(line_sizes.out)
+                .expect("valid vectorisation for out"),
             cube_count_plan.as_args(),
             config,
         );
 
@@ -521,9 +521,14 @@ impl<EG: Numeric> ConcreteInputsFactory for TensorInputs<EG> {
         line_sizes: &AttentionLineSizes,
     ) -> Self::RuntimeArg<'a, R> {
         TensorInputsLaunch::new(
-            query.as_tensor_arg(line_sizes.query),
-            key.as_tensor_arg(line_sizes.key),
-            value.as_tensor_arg(line_sizes.value),
+            query
+                .try_as_tensor_arg(line_sizes.query)
+                .expect("valid vectorisation for query"),
+            key.try_as_tensor_arg(line_sizes.key)
+                .expect("valid vectorisation for key"),
+            value
+                .try_as_tensor_arg(line_sizes.value)
+                .expect("valid vectorisation for value"),
             // mask.as_tensor_arg(line_sizes.value),
         )
     }
@@ -536,7 +541,8 @@ impl<EG: Numeric> ConcreteOutputFactory for Tensor<Line<EG>> {
         _problem: &AttentionProblem,
         line_sizes: &AttentionLineSizes,
     ) -> Self::RuntimeArg<'a, R> {
-        out.as_tensor_arg(line_sizes.out)
+        out.try_as_tensor_arg(line_sizes.out)
+            .expect("valid vectorisation for out")
     }
 }
 
 
@@ -38,11 +38,23 @@ impl<Lhs: Numeric, Rhs: Numeric, EO: Numeric> ConcreteInputsFactory for TensorIn
         line_sizes: &MatmulLineSizes,
     ) -> Self::RuntimeArg<'a, R> {
         TensorInputsLaunch::new(
-            lhs.data().as_tensor_arg(line_sizes.lhs),
-            lhs.scale().map(|it| it.as_tensor_arg(1)).into(),
-            rhs.data().as_tensor_arg(line_sizes.rhs),
-            rhs.scale().map(|it| it.as_tensor_arg(1)).into(),
-            bias.map(|it| it.as_tensor_arg(line_sizes.out)).into(),
+            lhs.data()
+                .try_as_tensor_arg(line_sizes.lhs)
+                .expect("valid vec lhs"),
+            lhs.scale()
+                .map(|it| it.try_as_tensor_arg(1).expect("vec=1"))
+                .into(),
+            rhs.data()
+                .try_as_tensor_arg(line_sizes.rhs)
+                .expect("valid vec rhs"),
+            rhs.scale()
+                .map(|it| it.try_as_tensor_arg(1).expect("vec=1"))
+                .into(),
+            bias.map(|it| {
+                it.try_as_tensor_arg(line_sizes.out)
+                    .expect("valid vec out")
+            })
+            .into(),
         )
     }
 }
@@ -104,7 +116,9 @@ impl<Lhs: Numeric, Rhs: Numeric, EO: Numeric> ConcreteInputsFactory
                 channels_per_pixel: tile_size_k,
                 pixels_per_column: stage_m,
             },
-            lhs.data().as_tensor_arg(line_sizes.lhs),
+            lhs.data()
+                .try_as_tensor_arg(line_sizes.lhs)
+                .expect("valid vec lhs"),
             lhs_elem,
         )
         .with_elem_stride(elem_stride)
@@ -114,12 +128,15 @@ impl<Lhs: Numeric, Rhs: Numeric, EO: Numeric> ConcreteInputsFactory
             TensorMapFormat::Tiled {
                 tile_size: stage_size_rhs,
             },
-            rhs.data().as_tensor_arg(1),
+            rhs.data().try_as_tensor_arg(1).expect("vec=1"),
             Rhs::as_type_native_unchecked(),
         )
         .with_prefetch(prefetch_rhs);
 
-        let bias = bias.map(|it| it.as_tensor_arg(line_sizes.out));
+        let bias = bias.map(|it| {
+            it.try_as_tensor_arg(line_sizes.out)
+                .expect("valid vec out")
+        });
 
         // TODO: Think about how to handle scales with TMA
         TensorMapInputsLaunch::new(lhs, rhs, bias.into())
 
@@ -88,15 +88,12 @@ pub fn test_convolution_algorithm<A, Args, P, R>(
     }
 
     let elem_size = size_of::<P::EG>();
-    let lhs_handle = unsafe {
-        TensorHandleRef::from_raw_parts(&lhs.handle, &lhs.strides, &lhs.shape, elem_size)
-    };
-    let rhs_handle = unsafe {
-        TensorHandleRef::from_raw_parts(&rhs.handle, &rhs.strides, &rhs.shape, elem_size)
-    };
-    let out_handle = unsafe {
-        TensorHandleRef::from_raw_parts(&out.handle, &out.strides, &out.shape, elem_size)
-    };
+    let lhs_handle = TensorHandleRef::<R>::try_from_parts(&lhs.handle, &lhs.strides, &lhs.shape, elem_size)
+        .expect("valid lhs handle");
+    let rhs_handle = TensorHandleRef::<R>::try_from_parts(&rhs.handle, &rhs.strides, &rhs.shape, elem_size)
+        .expect("valid rhs handle");
+    let out_handle = TensorHandleRef::<R>::try_from_parts(&out.handle, &out.strides, &out.shape, elem_size)
+        .expect("valid out handle");
 
     let lhs_handle = A::into_tensor_handle::<R, P::EG>(&client, &lhs_handle, MatmulIdent::Lhs);
     let rhs_handle = A::into_tensor_handle::<R, P::EG>(&client, &rhs_handle, MatmulIdent::Rhs);
 
@@ -13,6 +13,33 @@ use crate::{
 
 use super::Tensor;
 
+/// Errors that can occur when constructing a tensor handle safely.
+#[non_exhaustive]
+#[derive(Clone, Debug, PartialEq, Eq)]
+pub enum TensorHandleError {
+    /// Rank of shape and strides differ.
+    RankMismatch {
+        shape_rank: usize,
+        stride_rank: usize,
+    },
+    /// Element size must be > 0.
+    ElemSizeZero,
+    /// A stride is zero for a dimension with extent > 1.
+    ZeroStride { axis: usize },
+}
+
+/// Errors that can occur when converting a handle to a runtime tensor argument.
+#[non_exhaustive]
+#[derive(Clone, Debug, PartialEq, Eq)]
+pub enum TensorArgError {
+    /// Requested vectorization factor is not supported by the runtime.
+    UnsupportedVectorization { requested: u8, supported: &'static [u8] },
+    /// Inner-most dimension is not contiguous (stride != 1) while vectorization > 1.
+    NonContiguousInner,
+    /// Inner-most dimension is not divisible by the vectorization factor.
+    MisalignedVectorization { last_dim: usize, factor: u8 },
+}
+
 /// Argument to be used for [tensors](Tensor) passed as arguments to kernels.
 #[derive(Debug)]
 pub enum TensorArg<'a, R: Runtime> {
@@ -178,17 +205,33 @@ impl<R: Runtime> ArgSettings<R> for TensorArg<'_, R> {
 
 impl<'a, R: Runtime> TensorHandleRef<'a, R> {
     /// Convert the handle into a [tensor argument](TensorArg).
-    pub fn as_tensor_arg(&'a self, vectorisation: u8) -> TensorArg<'a, R> {
+    pub fn as_tensor_arg(&'a self, vectorization: u8) -> TensorArg<'a, R> {
         unsafe {
             TensorArg::from_raw_parts_and_size(
                 self.handle,
                 self.strides,
                 self.shape,
-                vectorisation,
+                vectorization,
                 self.elem_size,
             )
         }
     }
+    /// Convert the handle into a [tensor argument](TensorArg) with basic safety checks
+    /// for vectorization compatibility.
+    /// Try to convert the handle into a tensor argument, validating that the
+    /// requested vectorization factor is supported by the runtime. This does not
+    /// enforce inner-most contiguity or alignment requirements as kernels may
+    /// legally vectorize along axes other than the innermost.
+    pub fn try_as_tensor_arg(
+        &'a self,
+        vectorization: u8,
+    ) -> Result<TensorArg<'a, R>, TensorArgError> {
+        if !R::supported_line_sizes().contains(&vectorization) {
+            return Err(TensorArgError::UnsupportedVectorization { requested: vectorization, supported: R::supported_line_sizes() });
+        }
+        Ok(self.as_tensor_arg(vectorization))
+    }
+    
     /// Create a handle from raw parts.
     ///
     /// # Safety
@@ -209,4 +252,67 @@ impl<'a, R: Runtime> TensorHandleRef<'a, R> {
             runtime: PhantomData,
         }
     }
+
+    /// Safely create a tensor handle from raw parts with basic shape/stride validation.
+    pub fn try_from_parts(
+        handle: &'a cubecl_runtime::server::Handle,
+        strides: &'a [usize],
+        shape: &'a [usize],
+        elem_size: usize,
+    ) -> Result<Self, TensorHandleError> {
+        if shape.len() != strides.len() {
+            return Err(TensorHandleError::RankMismatch {
+                shape_rank: shape.len(),
+                stride_rank: strides.len(),
+            });
+        }
+        if elem_size == 0 {
+            return Err(TensorHandleError::ElemSizeZero);
+        }
+        // Disallow zero strides when corresponding dimension extent > 1 (broadcasted dims with extent 1 are allowed).
+        for (i, (&s, &d)) in strides.iter().zip(shape.iter()).enumerate() {
+            if s == 0 && d > 1 {
+                return Err(TensorHandleError::ZeroStride { axis: i });
+            }
+        }
+        Ok(unsafe { Self::from_raw_parts(handle, strides, shape, elem_size) })
+    }
+
+    /// Safely create a tensor handle from raw parts using the element type for size.
+    pub fn try_from_typed<E: CubePrimitive>(
+        handle: &'a cubecl_runtime::server::Handle,
+        strides: &'a [usize],
+        shape: &'a [usize],
+    ) -> Result<Self, TensorHandleError> {
+        let elem_size = E::size().expect("Element should have a size");
+        Self::try_from_parts(handle, strides, shape, elem_size)
+    }
+}
+
+impl core::fmt::Display for TensorHandleError {
+    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
+        match self {
+            TensorHandleError::RankMismatch { shape_rank, stride_rank } => {
+                write!(f, "rank mismatch (shape={}, strides={})", shape_rank, stride_rank)
+            }
+            TensorHandleError::ElemSizeZero => write!(f, "element size must be > 0"),
+            TensorHandleError::ZeroStride { axis } => write!(f, "zero stride on axis {} with extent > 1", axis),
+        }
+    }
+}
+
+impl core::fmt::Display for TensorArgError {
+    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
+        match self {
+            TensorArgError::UnsupportedVectorization { requested, supported } => {
+                write!(f, "unsupported vectorization {}, supported: {:?}", requested, supported)
+            }
+            TensorArgError::NonContiguousInner => write!(f, "non-contiguous innermost dimension for vectorized access"),
+            TensorArgError::MisalignedVectorization { last_dim, factor } => write!(
+                f,
+                "innermost dimension {} not divisible by vectorization {}",
+                last_dim, factor
+            ),
+        }
+    }
 }
@@ -21,6 +21,7 @@ pub mod sequence;
 pub mod slice;
 pub mod synchronization;
 pub mod tensor;
+pub mod tensor_handle;
 pub mod tensormap;
 pub mod to_client;
 pub mod topology;
@@ -138,6 +139,7 @@ macro_rules! testgen_untyped {
         cubecl_core::testgen_comparison!();
 
         cubecl_core::testgen_to_client!();
+        cubecl_core::testgen_tensor_handle!();
     };
 }