tracel-ai
diff --git a/‎crates/cubecl-core/src/frontend/mod.rs‎
Lines changed: 2 additions & 0 deletions b/‎crates/cubecl-core/src/frontend/mod.rs‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎crates/cubecl-core/src/frontend/trigonometry.rs‎
Lines changed: 74 additions & 0 deletions b/‎crates/cubecl-core/src/frontend/trigonometry.rs‎
Lines changed: 74 additions & 0 deletions
diff --git a/‎crates/cubecl-cpu/src/compiler/mod.rs‎
Lines changed: 6 additions & 1 deletion b/‎crates/cubecl-cpu/src/compiler/mod.rs‎
Lines changed: 6 additions & 1 deletion
diff --git a/‎crates/cubecl-cpu/src/compiler/passes/mod.rs‎
Lines changed: 1 addition & 0 deletions b/‎crates/cubecl-cpu/src/compiler/passes/mod.rs‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎crates/cubecl-cpu/src/compiler/passes/trigonometries_transform.rs‎
Lines changed: 37 additions & 0 deletions b/‎crates/cubecl-cpu/src/compiler/passes/trigonometries_transform.rs‎
Lines changed: 37 additions & 0 deletions
diff --git a/‎crates/cubecl-cpu/src/compiler/visitor/operation/arithmetic.rs‎
Lines changed: 5 additions & 78 deletions b/‎crates/cubecl-cpu/src/compiler/visitor/operation/arithmetic.rs‎
Lines changed: 5 additions & 78 deletions
diff --git a/‎crates/cubecl-opt/src/passes/constant_prop.rs‎
Lines changed: 5 additions & 3 deletions b/‎crates/cubecl-opt/src/passes/constant_prop.rs‎
Lines changed: 5 additions & 3 deletions
diff --git a/‎crates/cubecl-spirv/src/arithmetic.rs‎
Lines changed: 4 additions & 80 deletions b/‎crates/cubecl-spirv/src/arithmetic.rs‎
Lines changed: 4 additions & 80 deletions
diff --git a/‎crates/cubecl-spirv/src/compiler.rs‎
Lines changed: 3 additions & 1 deletion b/‎crates/cubecl-spirv/src/compiler.rs‎
Lines changed: 3 additions & 1 deletion
@@ -16,6 +16,7 @@ mod options;
 mod plane;
 mod polyfills;
 mod topology;
+mod trigonometry;
 
 pub use branch::{RangeExpand, SteppedRangeExpand, range, range_stepped};
 pub use const_expand::*;
@@ -29,5 +30,6 @@ pub use options::*;
 pub use plane::*;
 pub use polyfills::*;
 pub use topology::*;
+pub use trigonometry::*;
 
 pub use crate::{debug_print, debug_print_expand};
@@ -0,0 +1,74 @@
+use cubecl_ir::{ExpandElement, Variable};
+
+use crate::prelude::*;
+use crate::{self as cubecl};
+
+/// Computes the hypotenuse of a right triangle given the lengths of the other two sides.
+///
+/// This function computes `sqrt(x² + y²)` in a numerically stable way that avoids
+/// overflow and underflow issues.
+///
+/// # Arguments
+///
+/// * `x` - Length of one side
+/// * `y` - Length of the other side
+///
+/// # Returns
+///
+/// The length of the hypotenuse
+///
+/// # Example
+///
+/// ```rust,ignore
+/// let hyp = hypot(F::new(3.0), F::new(4.0));
+/// assert!((hyp - F::new(5.0)).abs() < F::new(1e-6));
+/// ```
+#[cube]
+pub fn hypot<F: Float>(lhs: F, rhs: F) -> F {
+    let one = F::from_int(1);
+    let a = F::abs(lhs);
+    let b = F::abs(rhs);
+    let max_val = F::max(a, b);
+    let max_val_is_zero = max_val == F::from_int(0);
+    let max_val_safe = select(max_val_is_zero, one, max_val);
+    let min_val = F::min(a, b);
+    let t = min_val / max_val_safe;
+
+    max_val * F::sqrt(one + (t * t))
+}
+
+#[allow(missing_docs)]
+pub fn expand_hypot(scope: &mut Scope, lhs: Variable, rhs: Variable, out: Variable) {
+    scope.register_type::<FloatExpand<0>>(lhs.ty.storage_type());
+    let res = hypot::expand::<FloatExpand<0>>(
+        scope,
+        ExpandElement::Plain(lhs).into(),
+        ExpandElement::Plain(rhs).into(),
+    );
+    assign::expand_no_check(scope, res, ExpandElement::Plain(out).into());
+}
+
+#[cube]
+pub fn rhypot<F: Float>(lhs: F, rhs: F) -> F {
+    let one = F::from_int(1);
+    let a = F::abs(lhs);
+    let b = F::abs(rhs);
+    let max_val = F::max(a, b);
+    let max_val_is_zero = max_val == F::from_int(0);
+    let max_val_safe = select(max_val_is_zero, one, max_val);
+    let min_val = F::min(a, b);
+    let t = min_val / max_val_safe;
+
+    F::inverse_sqrt(one + (t * t)) / max_val
+}
+
+#[allow(missing_docs)]
+pub fn expand_rhypot(scope: &mut Scope, lhs: Variable, rhs: Variable, out: Variable) {
+    scope.register_type::<FloatExpand<0>>(lhs.ty.storage_type());
+    let res = rhypot::expand::<FloatExpand<0>>(
+        scope,
+        ExpandElement::Plain(lhs).into(),
+        ExpandElement::Plain(rhs).into(),
+    );
+    assign::expand_no_check(scope, res, ExpandElement::Plain(out).into());
+}
@@ -23,7 +23,10 @@ use cubecl_core::{
 use cubecl_opt::OptimizerBuilder;
 use mlir_engine::MlirEngine;
 
-use crate::compiler::passes::erf_transform::ErfTransform;
+use crate::compiler::passes::{
+    erf_transform::ErfTransform,
+    trigonometries_transform::{HypotTransform, RhypotTransform},
+};
 
 #[derive(Clone, Debug, Default)]
 pub struct MlirCompiler {}
@@ -46,6 +49,8 @@ impl Compiler for MlirCompiler {
         dump_scope(&kernel.body, &kernel.options.kernel_name);
         let opt = OptimizerBuilder::default()
             .with_transformer(ErfTransform)
+            .with_transformer(HypotTransform)
+            .with_transformer(RhypotTransform)
             .with_processor(CheckedIoProcessor::new(mode))
             .with_processor(SaturatingArithmeticProcessor::new(true))
             .with_processor(PredicateProcessor)
 
@@ -1,2 +1,3 @@
 pub mod erf_transform;
 pub mod shared_memories;
+pub mod trigonometries_transform;
@@ -0,0 +1,37 @@
+use cubecl_core::{
+    ir::{Arithmetic, Instruction, Operation, Scope},
+    prelude::*,
+};
+use cubecl_opt::{IrTransformer, TransformAction};
+
+#[derive(Debug)]
+pub(crate) struct HypotTransform;
+
+impl IrTransformer for HypotTransform {
+    fn maybe_transform(&self, scope: &mut Scope, inst: &Instruction) -> TransformAction {
+        match &inst.operation {
+            Operation::Arithmetic(Arithmetic::Hypot(op)) => {
+                let mut scope = scope.child();
+                expand_hypot(&mut scope, op.lhs, op.rhs, inst.out.unwrap());
+                TransformAction::Replace(scope.process([]).instructions)
+            }
+            _ => TransformAction::Ignore,
+        }
+    }
+}
+
+#[derive(Debug)]
+pub(crate) struct RhypotTransform;
+
+impl IrTransformer for RhypotTransform {
+    fn maybe_transform(&self, scope: &mut Scope, inst: &Instruction) -> TransformAction {
+        match &inst.operation {
+            Operation::Arithmetic(Arithmetic::Rhypot(op)) => {
+                let mut scope = scope.child();
+                expand_rhypot(&mut scope, op.lhs, op.rhs, inst.out.unwrap());
+                TransformAction::Replace(scope.process([]).instructions)
+            }
+            _ => TransformAction::Ignore,
+        }
+    }
+}
@@ -223,7 +223,7 @@ impl<'a> Visitor<'a> {
                 self.insert_variable(out, operation);
             }
             Arithmetic::Erf(_) => {
-                unreachable!("Should have been transformed in primitive in a previous passe");
+                unreachable!("Should have been transformed in primitive in a previous phase");
             }
             Arithmetic::Exp(exp) => {
                 let value = self.get_variable(exp.input);
@@ -446,84 +446,11 @@ impl<'a> Visitor<'a> {
                     self.append_operation_with_result(arith::mulf(value, f, self.location));
                 self.insert_variable(out, result);
             }
-            Arithmetic::Hypot(hypot) => {
-                let (a, b) = self.get_binary_op_variable(hypot.lhs, hypot.rhs);
-                let abs_a = self.get_absolute_val(hypot.lhs.ty, a);
-                let abs_b = self.get_absolute_val(hypot.rhs.ty, b);
-                let zero = self.create_float_constant_from_item(hypot.lhs.ty, 0.0);
-                let one = self.create_float_constant_from_item(hypot.lhs.ty, 1.0);
-                let max =
-                    self.append_operation_with_result(arith::maxnumf(abs_a, abs_b, self.location));
-                let is_max_zero = self.append_operation_with_result(arith::cmpf(
-                    self.context,
-                    arith::CmpfPredicate::Oeq,
-                    max,
-                    zero,
-                    self.location,
-                ));
-                let max_safe = self.append_operation_with_result(arith::select(
-                    is_max_zero,
-                    one,
-                    max,
-                    self.location,
-                ));
-                let min =
-                    self.append_operation_with_result(arith::minimumf(abs_a, abs_b, self.location));
-                let t =
-                    self.append_operation_with_result(arith::divf(min, max_safe, self.location));
-                let t_square = self.append_operation_with_result(arith::mulf(t, t, self.location));
-                let t_square_plus_one =
-                    self.append_operation_with_result(arith::addf(t_square, one, self.location));
-                let square_root = self.append_operation_with_result(llvm_ods::intr_sqrt(
-                    self.context,
-                    t_square_plus_one,
-                    self.location,
-                ));
-                let result =
-                    self.append_operation_with_result(arith::mulf(max, square_root, self.location));
-
-                self.insert_variable(out, result);
+            Arithmetic::Hypot(_hypot) => {
+                unreachable!("Should have been transformed in primitive in a previous phase");
             }
-            Arithmetic::Rhypot(hypot) => {
-                let (a, b) = self.get_binary_op_variable(hypot.lhs, hypot.rhs);
-                let abs_a = self.get_absolute_val(hypot.lhs.ty, a);
-                let abs_b = self.get_absolute_val(hypot.rhs.ty, b);
-                let zero = self.create_float_constant_from_item(hypot.lhs.ty, 0.0);
-                let one = self.create_float_constant_from_item(hypot.lhs.ty, 1.0);
-                let max =
-                    self.append_operation_with_result(arith::maxnumf(abs_a, abs_b, self.location));
-                let is_max_zero = self.append_operation_with_result(arith::cmpf(
-                    self.context,
-                    arith::CmpfPredicate::Oeq,
-                    max,
-                    zero,
-                    self.location,
-                ));
-                let max_safe = self.append_operation_with_result(arith::select(
-                    is_max_zero,
-                    one,
-                    max,
-                    self.location,
-                ));
-                let min =
-                    self.append_operation_with_result(arith::minimumf(abs_a, abs_b, self.location));
-                let t =
-                    self.append_operation_with_result(arith::divf(min, max_safe, self.location));
-                let t_square = self.append_operation_with_result(arith::mulf(t, t, self.location));
-                let t_square_plus_one =
-                    self.append_operation_with_result(arith::addf(t_square, one, self.location));
-                let inverse_square_root = self.append_operation_with_result(math_ods::rsqrt(
-                    self.context,
-                    t_square_plus_one,
-                    self.location,
-                ));
-                let result = self.append_operation_with_result(arith::divf(
-                    inverse_square_root,
-                    max,
-                    self.location,
-                ));
-
-                self.insert_variable(out, result);
+            Arithmetic::Rhypot(_rhypot) => {
+                unreachable!("Should have been transformed in primitive in a previous phase");
             }
             Arithmetic::Recip(recip) => {
                 let value = self.get_variable(recip.input);
 
@@ -344,8 +344,6 @@ fn try_const_eval_arithmetic(op: &mut Arithmetic) -> Option<ConstantScalarValue>
         Arithmetic::Powi(op) => {
             const_eval_float!(op.lhs, op.rhs; num::Float::powf)
         }
-        Arithmetic::Hypot(op) => const_eval_float!(op.lhs, op.rhs; num::Float::hypot),
-        Arithmetic::Rhypot(op) => const_eval_float!(op.lhs, op.rhs; num::Float::hypot),
         Arithmetic::Modulo(op) => const_eval!(% op.lhs, op.rhs),
         Arithmetic::Remainder(op) => const_eval!(% op.lhs, op.rhs),
         Arithmetic::MulHi(op) => {
@@ -506,7 +504,11 @@ fn try_const_eval_arithmetic(op: &mut Arithmetic) -> Option<ConstantScalarValue>
                 }
             })
         }
-        Arithmetic::Erf(_) | Arithmetic::Magnitude(_) | Arithmetic::Normalize(_) => None,
+        Arithmetic::Erf(_)
+        | Arithmetic::Hypot(_)
+        | Arithmetic::Rhypot(_)
+        | Arithmetic::Magnitude(_)
+        | Arithmetic::Normalize(_) => None,
     }
 }
 
 
@@ -535,87 +535,11 @@ impl<T: SpirvTarget> SpirvCompiler<T> {
                     b.select(ty, Some(out), is_zero, even, sel1).unwrap();
                 })
             }
-            Arithmetic::Hypot(op) => {
-                self.compile_binary_op(op, out, uniform, |b, out_ty, ty, lhs, rhs, out| {
-                    let relaxed = matches!(out_ty.elem(), Elem::Relaxed);
-                    let zero = b.static_cast(ConstVal::Bit32(0), &Elem::Int(32, false), &out_ty);
-                    let one = b.static_cast(ConstVal::Bit32(1), &Elem::Int(32, false), &out_ty);
-                    let abs_a = b.id();
-                    T::f_abs(b, ty, lhs, abs_a);
-                    let abs_b = b.id();
-                    T::f_abs(b, ty, rhs, abs_b);
-                    let max = b.id();
-                    T::f_max(b, ty, abs_a, abs_b, max);
-                    let min = b.id();
-                    T::f_min(b, ty, abs_a, abs_b, min);
-                    let bool = Elem::Bool.id(b);
-                    let is_max_zero = b.f_ord_equal(bool, None, max, zero).unwrap();
-                    let max_safe = b.id();
-                    b.select(ty, Some(max_safe), is_max_zero, one, max).unwrap();
-                    let t = b.id();
-                    b.f_div(ty, Some(t), min, max_safe).unwrap();
-                    let t_fma = b.gl_fma(ty, t, t, one).unwrap();
-                    let square_root = b.id();
-                    T::sqrt(b, ty, t_fma, square_root);
-                    let ids = [
-                        abs_a,
-                        abs_b,
-                        max,
-                        is_max_zero,
-                        max_safe,
-                        t_fma,
-                        square_root,
-                        out,
-                    ];
-                    for id in ids {
-                        b.mark_uniformity(id, uniform);
-                        if relaxed {
-                            b.decorate(id, Decoration::RelaxedPrecision, []);
-                        }
-                    }
-                    b.f_mul(ty, Some(out), square_root, max).unwrap();
-                })
+            Arithmetic::Hypot(_op) => {
+                unreachable!("Replaced by transformer");
             }
-            Arithmetic::Rhypot(op) => {
-                self.compile_binary_op(op, out, uniform, |b, out_ty, ty, lhs, rhs, out| {
-                    let relaxed = matches!(out_ty.elem(), Elem::Relaxed);
-                    let zero = b.static_cast(ConstVal::Bit32(0), &Elem::Int(32, false), &out_ty);
-                    let one = b.static_cast(ConstVal::Bit32(1), &Elem::Int(32, false), &out_ty);
-                    let abs_a = b.id();
-                    T::f_abs(b, ty, lhs, abs_a);
-                    let abs_b = b.id();
-                    T::f_abs(b, ty, rhs, abs_b);
-                    let max = b.id();
-                    T::f_max(b, ty, abs_a, abs_b, max);
-                    let min = b.id();
-                    T::f_min(b, ty, abs_a, abs_b, min);
-                    let bool = Elem::Bool.id(b);
-                    let is_max_zero = b.f_ord_equal(bool, None, max, zero).unwrap();
-                    let max_safe = b.id();
-                    b.select(ty, Some(max_safe), is_max_zero, one, max).unwrap();
-                    let t = b.id();
-                    b.f_div(ty, Some(t), min, max_safe).unwrap();
-                    let t_fma = b.gl_fma(ty, t, t, one).unwrap();
-                    let inverse_square_root = b.id();
-                    T::inverse_sqrt(b, ty, t_fma, inverse_square_root);
-                    let ids = [
-                        abs_a,
-                        abs_b,
-                        max,
-                        is_max_zero,
-                        max_safe,
-                        t_fma,
-                        inverse_square_root,
-                        out,
-                    ];
-                    for id in ids {
-                        b.mark_uniformity(id, uniform);
-                        if relaxed {
-                            b.decorate(id, Decoration::RelaxedPrecision, []);
-                        }
-                    }
-                    b.f_div(ty, Some(out), inverse_square_root, max).unwrap();
-                })
+            Arithmetic::Rhypot(_op) => {
+                unreachable!("Replaced by transformer");
             }
             Arithmetic::Sqrt(op) => {
                 self.compile_unary_op_cast(op, out, uniform, |b, out_ty, ty, input, out| {
 
@@ -34,7 +34,7 @@ use crate::{
     item::Item,
     lookups::LookupTables,
     target::{GLCompute, SpirvTarget},
-    transformers::{BitwiseTransform, ErfTransform},
+    transformers::{BitwiseTransform, ErfTransform, HypotTransform, RhypotTransform},
 };
 
 pub const MAX_VECTORIZATION: u32 = 4;
@@ -215,6 +215,8 @@ impl<Target: SpirvTarget> SpirvCompiler<Target> {
         let mut opt = OptimizerBuilder::default()
             .with_transformer(ErfTransform)
             .with_transformer(BitwiseTransform)
+            .with_transformer(HypotTransform)
+            .with_transformer(RhypotTransform)
             .with_processor(CheckedIoProcessor::new(self.mode))
             .with_processor(UnrollProcessor::new(MAX_VECTORIZATION))
             .with_processor(SaturatingArithmeticProcessor::new(true))
Original file line number	Diff line number	Diff line change
`@@ -1,2 +1,3 @@`
`1`	`1`	`pub mod erf_transform;`
`2`	`2`	`pub mod shared_memories;`
	`3`	`+pub mod trigonometries_transform;`
Original file line number	Diff line number	Diff line change
`@@ -344,8 +344,6 @@ fn try_const_eval_arithmetic(op: &mut Arithmetic) -> Option<ConstantScalarValue>`
`344`	`344`	`Arithmetic::Powi(op) => {`
`345`	`345`	`const_eval_float!(op.lhs, op.rhs; num::Float::powf)`
`346`	`346`	`}`
`347`		`- Arithmetic::Hypot(op) => const_eval_float!(op.lhs, op.rhs; num::Float::hypot),`
`348`		`- Arithmetic::Rhypot(op) => const_eval_float!(op.lhs, op.rhs; num::Float::hypot),`
`349`	`347`	`Arithmetic::Modulo(op) => const_eval!(% op.lhs, op.rhs),`
`350`	`348`	`Arithmetic::Remainder(op) => const_eval!(% op.lhs, op.rhs),`
`351`	`349`	`Arithmetic::MulHi(op) => {`
`@@ -506,7 +504,11 @@ fn try_const_eval_arithmetic(op: &mut Arithmetic) -> Option<ConstantScalarValue>`
`506`	`504`	`}`
`507`	`505`	`})`
`508`	`506`	`}`
`509`		`- Arithmetic::Erf(_) \| Arithmetic::Magnitude(_) \| Arithmetic::Normalize(_) => None,`
	`507`	`+ Arithmetic::Erf(_)`
	`508`	`+ \| Arithmetic::Hypot(_)`
	`509`	`+ \| Arithmetic::Rhypot(_)`
	`510`	`+ \| Arithmetic::Magnitude(_)`
	`511`	`+ \| Arithmetic::Normalize(_) => None,`
`510`	`512`	`}`
`511`	`513`	`}`
`512`	`514`