tracel-ai
diff --git a/‎crates/cubecl-common/src/float/fp8/fp8_e8m0.rs‎
Lines changed: 76 additions & 69 deletions b/‎crates/cubecl-common/src/float/fp8/fp8_e8m0.rs‎
Lines changed: 76 additions & 69 deletions
diff --git a/‎crates/cubecl-core/src/runtime_tests/launch.rs‎
Lines changed: 2 additions & 2 deletions b/‎crates/cubecl-core/src/runtime_tests/launch.rs‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎crates/cubecl-macros/src/generate/cube_type/generate_struct.rs‎
Lines changed: 2 additions & 2 deletions b/‎crates/cubecl-macros/src/generate/cube_type/generate_struct.rs‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎crates/cubecl-quant/src/dequantize.rs‎
Lines changed: 23 additions & 23 deletions b/‎crates/cubecl-quant/src/dequantize.rs‎
Lines changed: 23 additions & 23 deletions
@@ -1,11 +1,6 @@
-use core::{
-    fmt::Display,
-    ops::{Add, AddAssign, Div, DivAssign, Mul, MulAssign, Neg, Sub, SubAssign},
-};
+use core::fmt::Display;
 
 use bytemuck::{Pod, Zeroable};
-use float4::E8M0;
-use num_traits::{NumCast, ToPrimitive};
 
 /// An 8-bit unsigned floating point type with 8 exponent bits and no mantissa bits.
 /// Used for scaling factors.
@@ -37,6 +32,7 @@ impl ue8m0 {
     /// other values are truncated and rounded to the nearest representable value.
     #[inline]
     #[must_use]
+    #[cfg(feature = "float4")]
     pub fn from_f32(value: f32) -> ue8m0 {
         Self::from_f64(value as f64)
     }
@@ -49,8 +45,9 @@ impl ue8m0 {
     /// values are truncated and rounded to the nearest representable value.
     #[inline]
     #[must_use]
+    #[cfg(feature = "float4")]
     pub fn from_f64(value: f64) -> ue8m0 {
-        ue8m0(E8M0::from_f64(value).to_bits())
+        ue8m0(float4::E8M0::from_f64(value).to_bits())
     }
 
     /// Converts a [`ue8m0`] into the underlying bit representation.
@@ -65,6 +62,7 @@ impl ue8m0 {
     /// This conversion is lossless as all values can be represented exactly in [`f32`].
     #[inline]
     #[must_use]
+    #[cfg(feature = "float4")]
     pub fn to_f32(self) -> f32 {
         self.to_f64() as f32
     }
@@ -74,101 +72,110 @@ impl ue8m0 {
     /// This conversion is lossless as all values can be represented exactly in [`f64`].
     #[inline]
     #[must_use]
+    #[cfg(feature = "float4")]
     pub fn to_f64(self) -> f64 {
-        E8M0::from_bits(self.0).to_f64()
+        float4::E8M0::from_bits(self.0).to_f64()
     }
 }
 
-impl Neg for ue8m0 {
-    type Output = Self;
-
-    fn neg(self) -> Self::Output {
-        Self::from_f32(self.to_f32().neg())
+impl Display for ue8m0 {
+    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
+        write!(f, "{}", self.0)
     }
 }
 
-impl Mul for ue8m0 {
-    type Output = Self;
+#[cfg(feature = "float4")]
+mod numeric {
+    use num_traits::{NumCast, ToPrimitive};
 
-    fn mul(self, rhs: Self) -> Self::Output {
-        Self::from_f32(self.to_f32() * rhs.to_f32())
-    }
-}
+    use super::*;
+    use core::ops::{Add, AddAssign, Div, DivAssign, Mul, MulAssign, Neg, Sub, SubAssign};
 
-impl MulAssign for ue8m0 {
-    fn mul_assign(&mut self, rhs: Self) {
-        *self = *self * rhs;
+    impl Neg for ue8m0 {
+        type Output = Self;
+
+        fn neg(self) -> Self::Output {
+            Self::from_f32(self.to_f32().neg())
+        }
     }
-}
 
-impl Div for ue8m0 {
-    type Output = Self;
+    impl Mul for ue8m0 {
+        type Output = Self;
 
-    fn div(self, rhs: Self) -> Self::Output {
-        Self::from_f32(self.to_f32() / rhs.to_f32())
+        fn mul(self, rhs: Self) -> Self::Output {
+            Self::from_f32(self.to_f32() * rhs.to_f32())
+        }
     }
-}
 
-impl DivAssign for ue8m0 {
-    fn div_assign(&mut self, rhs: Self) {
-        *self = *self / rhs;
+    impl MulAssign for ue8m0 {
+        fn mul_assign(&mut self, rhs: Self) {
+            *self = *self * rhs;
+        }
     }
-}
 
-impl Add for ue8m0 {
-    type Output = Self;
+    impl Div for ue8m0 {
+        type Output = Self;
 
-    fn add(self, rhs: Self) -> Self::Output {
-        Self::from_f32(self.to_f32() + rhs.to_f32())
+        fn div(self, rhs: Self) -> Self::Output {
+            Self::from_f32(self.to_f32() / rhs.to_f32())
+        }
     }
-}
 
-impl AddAssign for ue8m0 {
-    fn add_assign(&mut self, rhs: Self) {
-        *self = *self + rhs;
+    impl DivAssign for ue8m0 {
+        fn div_assign(&mut self, rhs: Self) {
+            *self = *self / rhs;
+        }
     }
-}
 
-impl Sub for ue8m0 {
-    type Output = Self;
+    impl Add for ue8m0 {
+        type Output = Self;
 
-    fn sub(self, rhs: Self) -> Self::Output {
-        Self::from_f32(self.to_f32() - rhs.to_f32())
+        fn add(self, rhs: Self) -> Self::Output {
+            Self::from_f32(self.to_f32() + rhs.to_f32())
+        }
     }
-}
 
-impl SubAssign for ue8m0 {
-    fn sub_assign(&mut self, rhs: Self) {
-        *self = *self - rhs;
+    impl AddAssign for ue8m0 {
+        fn add_assign(&mut self, rhs: Self) {
+            *self = *self + rhs;
+        }
     }
-}
 
-impl ToPrimitive for ue8m0 {
-    fn to_i64(&self) -> Option<i64> {
-        Some(ue8m0::to_f32(*self) as i64)
-    }
+    impl Sub for ue8m0 {
+        type Output = Self;
 
-    fn to_u64(&self) -> Option<u64> {
-        Some(ue8m0::to_f64(*self) as u64)
+        fn sub(self, rhs: Self) -> Self::Output {
+            Self::from_f32(self.to_f32() - rhs.to_f32())
+        }
     }
 
-    fn to_f32(&self) -> Option<f32> {
-        Some(ue8m0::to_f32(*self))
+    impl SubAssign for ue8m0 {
+        fn sub_assign(&mut self, rhs: Self) {
+            *self = *self - rhs;
+        }
     }
 
-    fn to_f64(&self) -> Option<f64> {
-        Some(ue8m0::to_f64(*self))
-    }
-}
+    impl ToPrimitive for ue8m0 {
+        fn to_i64(&self) -> Option<i64> {
+            Some(ue8m0::to_f32(*self) as i64)
+        }
+
+        fn to_u64(&self) -> Option<u64> {
+            Some(ue8m0::to_f64(*self) as u64)
+        }
 
-impl NumCast for ue8m0 {
-    fn from<T: num_traits::ToPrimitive>(n: T) -> Option<Self> {
-        Some(Self::from_f32(n.to_f32()?))
+        fn to_f32(&self) -> Option<f32> {
+            Some(ue8m0::to_f32(*self))
+        }
+
+        fn to_f64(&self) -> Option<f64> {
+            Some(ue8m0::to_f64(*self))
+        }
     }
-}
 
-impl Display for ue8m0 {
-    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
-        write!(f, "{}", self.0)
+    impl NumCast for ue8m0 {
+        fn from<T: num_traits::ToPrimitive>(n: T) -> Option<Self> {
+            Some(Self::from_f32(n.to_f32()?))
+        }
     }
 }
@@ -61,7 +61,7 @@ pub fn test_kernel_with_comptime_tag<R: Runtime>(client: ComputeClient<R::Server
         &client,
         CubeCount::Static(1, 1, 1),
         CubeDim::default(),
-        ComptimeTagLaunch::new(array_arg, &"zero".to_string()),
+        ComptimeTagLaunch::new(array_arg, "zero".to_string()),
     );
 
     let actual = client.read_one(handle);
@@ -76,7 +76,7 @@ pub fn test_kernel_with_comptime_tag<R: Runtime>(client: ComputeClient<R::Server
         &client,
         CubeCount::Static(1, 1, 1),
         CubeDim::default(),
-        ComptimeTagLaunch::new(array_arg, &"not_zero".to_string()),
+        ComptimeTagLaunch::new(array_arg, "not_zero".to_string()),
     );
 
     let actual = client.read_one(handle);
 
@@ -375,7 +375,7 @@ impl TypeField {
         if !self.comptime.is_present() {
             quote![#vis #name: <#ty as #launch_arg>::RuntimeArg<'a, R>]
         } else {
-            quote![#vis #name: &'a #ty]
+            quote![#vis #name: #ty]
         }
     }
 
@@ -387,7 +387,7 @@ impl TypeField {
         if !self.comptime.is_present() {
             quote![#name: <#ty as #launch_arg>::RuntimeArg<'a, R>]
         } else {
-            quote![#name: &'a #ty]
+            quote![#name: #ty]
         }
     }
 
 
@@ -31,7 +31,7 @@ pub fn dequantize_symmetric_packed_values<F: Float, FS: CubePrimitive, QI: Int>(
     position: u32,
     values: &View<Line<QI>, u32>,
     scales: &View<FS, u32>,
-    #[comptime] scheme: &QuantScheme,
+    #[comptime] scheme: QuantScheme,
 ) -> Array<Line<F>> {
     dequantize_symmetric_packed_value_at::<F, FS, QI>(position, values[position], scales, scheme)
 }
@@ -45,7 +45,7 @@ pub fn dequantize_symmetric_packed_value_at<F: Float, FS: CubePrimitive, QI: Int
     position: u32,
     values: Line<QI>,
     scales: &View<FS, u32>,
-    #[comptime] scheme: &QuantScheme,
+    #[comptime] scheme: QuantScheme,
 ) -> Array<Line<F>> {
     dequantize_symmetric_packed_value::<F, FS, QI>(values, scales, position, scheme)
 }
@@ -59,7 +59,7 @@ pub fn dequantize_symmetric_packed_value<F: Float, FS: CubePrimitive, QS: Int>(
     values: Line<QS>,
     scales: &View<FS, u32>,
     position: u32,
-    #[comptime] scheme: &QuantScheme,
+    #[comptime] scheme: QuantScheme,
 ) -> Array<Line<F>> {
     let line_size_values = values.line_size();
     let num_quants = comptime!(scheme.num_quants() as u32);
@@ -120,7 +120,7 @@ fn dequantize_symmetric_packed_kernel<F: Float, FS: CubePrimitive>(
     input: &LinearView<Line<u32>>,
     scales: &ScalesView<FS>,
     output: &mut LinearView<Line<F>, ReadWrite>,
-    #[comptime] scheme: &QuantScheme,
+    #[comptime] scheme: QuantScheme,
 ) {
     if !input.is_in_bounds(ABSOLUTE_POS) {
         terminate!();
@@ -177,19 +177,19 @@ pub fn launch_ref<R: Runtime, F: Float>(
             ..
         } => match scheme.param {
             QuantParam::F32 => {
-                dequantize_packed::<R, F, f32>(client, values, scheme, params, output)
+                dequantize_packed::<R, F, f32>(client, values, *scheme, params, output)
             }
             QuantParam::F16 => {
-                dequantize_packed::<R, F, f16>(client, values, scheme, params, output)
+                dequantize_packed::<R, F, f16>(client, values, *scheme, params, output)
             }
             QuantParam::BF16 => {
-                dequantize_packed::<R, F, bf16>(client, values, scheme, params, output)
+                dequantize_packed::<R, F, bf16>(client, values, *scheme, params, output)
             }
             QuantParam::UE8M0 => {
-                dequantize_packed::<R, F, ue8m0>(client, values, scheme, params, output)
+                dequantize_packed::<R, F, ue8m0>(client, values, *scheme, params, output)
             }
             QuantParam::UE4M3 => {
-                dequantize_packed::<R, F, e4m3>(client, values, scheme, params, output)
+                dequantize_packed::<R, F, e4m3>(client, values, *scheme, params, output)
             }
         },
         QuantScheme {
@@ -211,19 +211,19 @@ pub fn launch_ref<R: Runtime, F: Float>(
 
             match scheme.param {
                 QuantParam::F32 => {
-                    dequantize_native::<R, F, f32>(client, values, scheme, params, output)
+                    dequantize_native::<R, F, f32>(client, values, *scheme, params, output)
                 }
                 QuantParam::F16 => {
-                    dequantize_native::<R, F, f16>(client, values, scheme, params, output)
+                    dequantize_native::<R, F, f16>(client, values, *scheme, params, output)
                 }
                 QuantParam::BF16 => {
-                    dequantize_native::<R, F, bf16>(client, values, scheme, params, output)
+                    dequantize_native::<R, F, bf16>(client, values, *scheme, params, output)
                 }
                 QuantParam::UE8M0 => {
-                    dequantize_native::<R, F, ue8m0>(client, values, scheme, params, output)
+                    dequantize_native::<R, F, ue8m0>(client, values, *scheme, params, output)
                 }
                 QuantParam::UE4M3 => {
-                    dequantize_native::<R, F, e4m3>(client, values, scheme, params, output)
+                    dequantize_native::<R, F, e4m3>(client, values, *scheme, params, output)
                 }
             }
         }
@@ -240,7 +240,7 @@ pub fn launch_ref<R: Runtime, F: Float>(
 fn dequantize_packed<R: Runtime, F: Float, FS: CubePrimitive>(
     client: &ComputeClient<R::Server, R::Channel>,
     input: &TensorHandleRef<R>,
-    scheme: &QuantScheme,
+    scheme: QuantScheme,
     scale: &TensorHandleRef<'_, R>,
     output: &TensorHandleRef<R>,
 ) {
@@ -276,10 +276,10 @@ fn dequantize_packed<R: Runtime, F: Float, FS: CubePrimitive>(
                     client,
                     cube_count,
                     cube_dim,
-                    linear_view(client, input, &line_size_in),
-                    scales_view(client, input, scale, &1, scheme),
-                    linear_view(client, output, &line_size_out),
-                    scheme.clone(),
+                    linear_view(client, input, line_size_in),
+                    scales_view(client, input, scale, 1, &scheme),
+                    linear_view(client, output, line_size_out),
+                    scheme,
                 )
             };
         }
@@ -290,7 +290,7 @@ fn dequantize_packed<R: Runtime, F: Float, FS: CubePrimitive>(
 fn dequantize_native<R: Runtime, F: Float, FS: CubePrimitive>(
     client: &ComputeClient<R::Server, R::Channel>,
     input: &TensorHandleRef<R>,
-    scheme: &QuantScheme,
+    scheme: QuantScheme,
     scale: &TensorHandleRef<'_, R>,
     output: &TensorHandleRef<R>,
 ) {
@@ -333,9 +333,9 @@ fn dequantize_native<R: Runtime, F: Float, FS: CubePrimitive>(
                     client,
                     cube_count,
                     cube_dim,
-                    linear_view(client, input, &line_size),
-                    scales_view(client, input, scale, &1, scheme),
-                    linear_view(client, output, &line_size),
+                    linear_view(client, input, line_size),
+                    scales_view(client, input, scale, 1, &scheme),
+                    linear_view(client, output, line_size),
                 )
             };
         }
Original file line number	Diff line number	Diff line change
`@@ -375,7 +375,7 @@ impl TypeField {`
`375`	`375`	`if !self.comptime.is_present() {`
`376`	`376`	`quote![#vis #name: <#ty as #launch_arg>::RuntimeArg<'a, R>]`
`377`	`377`	`} else {`
`378`		`- quote![#vis #name: &'a #ty]`
	`378`	`+ quote![#vis #name: #ty]`
`379`	`379`	`}`
`380`	`380`	`}`
`381`	`381`
`@@ -387,7 +387,7 @@ impl TypeField {`
`387`	`387`	`if !self.comptime.is_present() {`
`388`	`388`	`quote![#name: <#ty as #launch_arg>::RuntimeArg<'a, R>]`
`389`	`389`	`} else {`
`390`		`- quote![#name: &'a #ty]`
	`390`	`+ quote![#name: #ty]`
`391`	`391`	`}`
`392`	`392`	`}`
`393`	`393`