tracel-ai
diff --git a/‎Cargo.toml‎
Lines changed: 1 addition & 0 deletions b/‎Cargo.toml‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎crates/cubecl-common/Cargo.toml‎
Lines changed: 1 addition & 0 deletions b/‎crates/cubecl-common/Cargo.toml‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎crates/cubecl-common/src/map.rs‎
Lines changed: 6 additions & 0 deletions b/‎crates/cubecl-common/src/map.rs‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎crates/cubecl-core/src/frontend/container/line/ops.rs‎
Lines changed: 3 additions & 1 deletion b/‎crates/cubecl-core/src/frontend/container/line/ops.rs‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎crates/cubecl-core/src/frontend/element/float.rs‎
Lines changed: 2 additions & 0 deletions b/‎crates/cubecl-core/src/frontend/element/float.rs‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎crates/cubecl-core/src/frontend/element/float/typemap.rs‎
Lines changed: 2 additions & 0 deletions b/‎crates/cubecl-core/src/frontend/element/float/typemap.rs‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎crates/cubecl-core/src/frontend/operation/cmp.rs‎
Lines changed: 2 additions & 0 deletions b/‎crates/cubecl-core/src/frontend/operation/cmp.rs‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎crates/cubecl-core/src/frontend/operation/unary.rs‎
Lines changed: 27 additions & 1 deletion b/‎crates/cubecl-core/src/frontend/operation/unary.rs‎
Lines changed: 27 additions & 1 deletion
diff --git a/‎crates/cubecl-core/src/post_processing/mod.rs‎
Lines changed: 1 addition & 0 deletions b/‎crates/cubecl-core/src/post_processing/mod.rs‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎crates/cubecl-core/src/post_processing/predicate.rs‎
Lines changed: 143 additions & 0 deletions b/‎crates/cubecl-core/src/post_processing/predicate.rs‎
Lines changed: 143 additions & 0 deletions
@@ -89,6 +89,7 @@ tracel-xtask = { version = "=2.1.8" }
 portable-atomic = { version = "1.10", default-features = false, features = [
     "serde",
 ] }
+portable-atomic-util = { version = "0.2.4", features = ["alloc"] }
 pretty_assertions = "1.4"
 
 # Async
 
@@ -62,6 +62,7 @@ spin = { workspace = true, features = ["mutex", "spin_mutex"] }
 
 [target.'cfg(not(target_has_atomic = "ptr"))'.dependencies]
 portable-atomic = { workspace = true }
+portable-atomic-util = { workspace = true }
 spin = { workspace = true, features = [
     "mutex",
     "spin_mutex",
 
@@ -1,5 +1,11 @@
 use crate::stub::{Mutex, RwLock, RwLockReadGuard, RwLockWriteGuard};
+
+#[cfg(target_has_atomic = "ptr")]
 use alloc::sync::Arc;
+
+#[cfg(not(target_has_atomic = "ptr"))]
+use portable_atomic_util::Arc;
+
 use hashbrown::HashMap;
 
 /// A thread-safe map that allows concurrent access to values using read-write locks.
 
@@ -4,7 +4,7 @@ use num_traits::{NumCast, ToPrimitive};
 
 use crate::{
     self as cubecl,
-    prelude::{Powi, SaturatingAdd, SaturatingSub},
+    prelude::{IsInf, IsNan, Powi, SaturatingAdd, SaturatingSub},
 };
 use crate::{
     frontend::{
@@ -259,6 +259,8 @@ impl<P: CubePrimitive + ReverseBits> ReverseBits for Line<P> {}
 impl<P: CubePrimitive + BitwiseNot> BitwiseNot for Line<P> {}
 impl<P: CubePrimitive + SaturatingAdd> SaturatingAdd for Line<P> {}
 impl<P: CubePrimitive + SaturatingSub> SaturatingSub for Line<P> {}
+impl<P: CubePrimitive + IsNan> IsNan for Line<P> {}
+impl<P: CubePrimitive + IsInf> IsInf for Line<P> {}
 
 #[cube]
 impl<P: CountOnes> Line<P> {
 
@@ -37,6 +37,8 @@ pub trait Float:
     + Magnitude
     + Normalize
     + Dot
+    + IsNan
+    + IsInf
     + Into<Self::ExpandType>
     + core::ops::Neg<Output = Self>
     + core::ops::Add<Output = Self>
 
@@ -250,6 +250,8 @@ impl<const POS: u8> Sqrt for ElemExpand<POS> {}
 impl<const POS: u8> Round for ElemExpand<POS> {}
 impl<const POS: u8> Floor for ElemExpand<POS> {}
 impl<const POS: u8> Ceil for ElemExpand<POS> {}
+impl<const POS: u8> IsNan for ElemExpand<POS> {}
+impl<const POS: u8> IsInf for ElemExpand<POS> {}
 
 impl<const POS: u8> Float for ElemExpand<POS> {
     const DIGITS: u32 = 32;
 
@@ -3,6 +3,8 @@ use crate::frontend::operation::base::cmp_expand;
 use crate::ir::{Comparison, Scope};
 use crate::prelude::CubePrimitive;
 
+// NOTE: Unary comparison tests are in the unary module
+
 pub mod ne {
     use super::*;
 
 
@@ -1,5 +1,5 @@
 use cubecl_common::{e2m1, e4m3, e5m2, ue8m0};
-use cubecl_ir::{Bitwise, Operator, Type};
+use cubecl_ir::{Bitwise, Comparison, Operator, Type};
 use half::{bf16, f16};
 
 use crate::{
@@ -358,3 +358,29 @@ impl_unary_func_fixed_out_ty!(
     u64,
     i64
 );
+impl_unary_func_fixed_out_ty!(
+    IsNan,
+    is_nan,
+    __expand_is_nan,
+    bool,
+    Comparison::IsNan,
+    f16,
+    bf16,
+    flex32,
+    tf32,
+    f32,
+    f64
+);
+impl_unary_func_fixed_out_ty!(
+    IsInf,
+    is_inf,
+    __expand_is_inf,
+    bool,
+    Comparison::IsInf,
+    f16,
+    bf16,
+    flex32,
+    tf32,
+    f32,
+    f64
+);
@@ -1,3 +1,4 @@
 pub mod checked_io;
+pub mod predicate;
 pub mod saturating;
 pub mod unroll;
@@ -0,0 +1,143 @@
+use core::{f32, f64};
+
+use crate as cubecl;
+use cubecl_ir::{
+    Allocator, Comparison, ElemType, ExpandElement, FloatKind, Instruction, Operation, Processor,
+    Scope, ScopeProcessing, UIntKind, Variable,
+};
+use half::{bf16, f16};
+
+use crate::prelude::*;
+
+#[derive(Debug, Default)]
+pub struct PredicateProcessor;
+
+impl Processor for PredicateProcessor {
+    fn transform(
+        &self,
+        mut processing: cubecl_ir::ScopeProcessing,
+        allocator: Allocator,
+    ) -> cubecl_ir::ScopeProcessing {
+        let mut instructions = Vec::new();
+        core::mem::swap(&mut processing.instructions, &mut instructions);
+
+        for instruction in instructions {
+            if let Operation::Comparison(comparison) = &instruction.operation {
+                match comparison {
+                    Comparison::IsNan(op) => {
+                        run_polyfill(
+                            &mut processing,
+                            op.input,
+                            instruction.out(),
+                            &allocator,
+                            is_nan::expand::<FloatExpand<0>, IntExpand<1>>,
+                        );
+                        continue;
+                    }
+                    Comparison::IsInf(op) => {
+                        run_polyfill(
+                            &mut processing,
+                            op.input,
+                            instruction.out(),
+                            &allocator,
+                            is_inf::expand::<FloatExpand<0>, IntExpand<1>>,
+                        );
+                        continue;
+                    }
+                    _ => {}
+                }
+            }
+            processing.instructions.push(instruction);
+        }
+        processing
+    }
+}
+
+fn run_polyfill<T: CubePrimitive, O: CubePrimitive>(
+    processing: &mut ScopeProcessing,
+    input: Variable,
+    out: Variable,
+    allocator: &Allocator,
+    mut polyfill: impl FnMut(&mut Scope, ExpandElementTyped<T>, u32, u32) -> ExpandElementTyped<O>,
+) {
+    let input = ExpandElement::Plain(input);
+    let mut scope = Scope::root(false).with_allocator(allocator.clone());
+    scope.register_type::<FloatExpand<0>>(input.storage_type());
+
+    let out_poly = if let ElemType::Float(kind) = input.elem_type() {
+        let (unsigned_ty, bit_width, mantissa_bits) = match kind {
+            FloatKind::F64 => (
+                UIntKind::U64,
+                f64::size_bits().unwrap(),
+                f64::MANTISSA_DIGITS - 1,
+            ),
+            FloatKind::F32 => (
+                UIntKind::U32,
+                f32::size_bits().unwrap(),
+                f32::MANTISSA_DIGITS - 1,
+            ),
+            FloatKind::F16 => (
+                UIntKind::U16,
+                f16::size_bits().unwrap(),
+                f16::MANTISSA_DIGITS - 1,
+            ),
+            FloatKind::BF16 => (
+                UIntKind::U16,
+                bf16::size_bits().unwrap(),
+                bf16::MANTISSA_DIGITS - 1,
+            ),
+            _ => unreachable!(),
+        };
+        scope.register_type::<IntExpand<1>>(ElemType::UInt(unsigned_ty).into());
+
+        let exp_bits = bit_width as u32 - mantissa_bits - 1;
+
+        polyfill(&mut scope, input.into(), mantissa_bits, exp_bits).expand
+    } else {
+        panic!("Should be float")
+    };
+
+    let tmp_processing = scope.process([]);
+
+    processing.instructions.extend(tmp_processing.instructions);
+    processing.variables.extend(tmp_processing.variables);
+
+    processing
+        .instructions
+        .push(Instruction::new(Operation::Copy(*out_poly), out));
+}
+
+#[cube]
+fn is_nan<F: Float, U: Int>(
+    x: Line<F>,
+    #[comptime] mantissa_bits: u32,
+    #[comptime] exp_bits: u32,
+) -> Line<bool> {
+    // Need to mark as u64 otherwise it is coerced into i32 which does not fit the values for f64
+    let inf_bits = comptime![((1u64 << exp_bits as u64) - 1u64) << mantissa_bits as u64];
+    let abs_mask = comptime![(1u64 << (exp_bits as u64 + mantissa_bits as u64)) - 1u64];
+
+    let bits: Line<U> = Line::<U>::reinterpret(x);
+
+    let abs_bits = bits & Line::new(U::cast_from(abs_mask));
+
+    abs_bits.greater_than(Line::new(U::cast_from(inf_bits)))
+}
+
+// Same trick as NaN detection following IEEE 754, but check for all 0 bits equality
+#[cube]
+fn is_inf<F: Float, U: Int>(
+    x: Line<F>,
+    #[comptime] mantissa_bits: u32,
+    #[comptime] exp_bits: u32,
+) -> Line<bool> {
+    // Need to mark as u64 otherwise it is coerced into i32 which does not fit the values for f64
+    let inf_bits = comptime![((1u64 << exp_bits as u64) - 1u64) << mantissa_bits as u64];
+    let abs_mask = comptime![(1u64 << (exp_bits as u64 + mantissa_bits as u64)) - 1u64];
+
+    let bits: Line<U> = Line::<U>::reinterpret(x);
+
+    let abs_bits = bits & Line::new(U::cast_from(abs_mask));
+
+    abs_bits.equal(Line::new(U::cast_from(inf_bits)))
+}