Add __addhf3, __subhf3, __mulhf3, and hf comparisons

tgross35 · tgross35 · commit b967dcaf90e0 · 2025-08-09T06:53:45.000-05:00
LLVM does not currently emit these, but it is being discussed as an
option on platforms where `f32` is not hardware supported. These three
intrinsics are trivial, as are comparisons, so add them here.
diff --git a/builtins-test/tests/addsub.rs b/builtins-test/tests/addsub.rs
@@ -1,4 +1,5 @@
 #![allow(unused_macros)]
+#![cfg_attr(f16_enabled, feature(f16))]
 #![cfg_attr(f128_enabled, feature(f128))]
 
 use builtins_test::*;
@@ -115,28 +116,25 @@ macro_rules! float_sum {
 mod float_addsub {
     use super::*;
 
+    #[cfg(f16_enabled)]
+    float_sum! {
+        f16, __addhf3, __subhf3, Half, all();
+    }
+
     float_sum! {
         f32, __addsf3, __subsf3, Single, all();
         f64, __adddf3, __subdf3, Double, all();
     }
-}
-
-#[cfg(f128_enabled)]
-#[cfg(not(x86_no_sse))]
-#[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))]
-mod float_addsub_f128 {
-    use super::*;
 
+    #[cfg(f128_enabled)]
+    #[cfg(not(x86_no_sse))]
+    #[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))]
     float_sum! {
         f128, __addtf3, __subtf3, Quad, not(feature = "no-sys-f128");
     }
-}
-
-#[cfg(f128_enabled)]
-#[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))]
-mod float_addsub_f128_ppc {
-    use super::*;
 
+    #[cfg(f128_enabled)]
+    #[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))]
     float_sum! {
         f128, __addkf3, __subkf3, Quad, not(feature = "no-sys-f128");
     }
diff --git a/builtins-test/tests/cmp.rs b/builtins-test/tests/cmp.rs
@@ -1,5 +1,6 @@
 #![allow(unused_macros)]
 #![allow(unreachable_code)]
+#![cfg_attr(f16_enabled, feature(f16))]
 #![cfg_attr(f128_enabled, feature(f128))]
 
 use builtins_test::*;
@@ -51,6 +52,25 @@ mod float_comparisons {
         };
     }
 
+    #[test]
+    fn cmp_f16() {
+        use compiler_builtins::float::cmp::{
+            __eqhf2, __gehf2, __gthf2, __lehf2, __lthf2, __nehf2, __unordhf2,
+        };
+
+        fuzz_float_2(N, |x: f16, y: f16| {
+            assert_eq!(__unordhf2(x, y) != 0, x.is_nan() || y.is_nan());
+            cmp!(f16, x, y, Half, all(),
+                1, __lthf2;
+                1, __lehf2;
+                1, __eqhf2;
+                -1, __gehf2;
+                -1, __gthf2;
+                1, __nehf2;
+            );
+        });
+    }
+
     #[test]
     fn cmp_f32() {
         use compiler_builtins::float::cmp::{
diff --git a/builtins-test/tests/mul.rs b/builtins-test/tests/mul.rs
@@ -1,5 +1,6 @@
-#![allow(unused_macros)]
+#![cfg_attr(f16_enabled, feature(f16))]
 #![cfg_attr(f128_enabled, feature(f128))]
+#![allow(unused_macros)]
 
 use builtins_test::*;
 
@@ -117,6 +118,11 @@ macro_rules! float_mul {
 mod float_mul {
     use super::*;
 
+    #[cfg(f16_enabled)]
+    float_mul! {
+        f16, __mulhf3, Half, all();
+    }
+
     // FIXME(#616): Stop ignoring arches that don't have native support once fix for builtins is in
     // nightly.
     float_mul! {
diff --git a/compiler-builtins/src/float/add.rs b/compiler-builtins/src/float/add.rs
@@ -191,6 +191,11 @@ where
 }
 
 intrinsics! {
+    #[cfg(f16_enabled)]
+    pub extern "C" fn __addhf3(a: f16, b: f16) -> f16 {
+        add(a, b)
+    }
+
     #[aapcs_on_arm]
     #[arm_aeabi_alias = __aeabi_fadd]
     pub extern "C" fn __addsf3(a: f32, b: f32) -> f32 {
diff --git a/compiler-builtins/src/float/cmp.rs b/compiler-builtins/src/float/cmp.rs
@@ -115,6 +115,37 @@ fn unord<F: Float>(a: F, b: F) -> bool {
     a_abs > inf_rep || b_abs > inf_rep
 }
 
+#[cfg(f16_enabled)]
+intrinsics! {
+    pub extern "C" fn __lehf2(a: f16, b: f16) -> crate::float::cmp::CmpResult {
+        cmp(a, b).to_le_abi()
+    }
+
+    pub extern "C" fn __gehf2(a: f16, b: f16) -> crate::float::cmp::CmpResult {
+        cmp(a, b).to_ge_abi()
+    }
+
+    pub extern "C" fn __unordhf2(a: f16, b: f16) -> crate::float::cmp::CmpResult {
+        unord(a, b) as crate::float::cmp::CmpResult
+    }
+
+    pub extern "C" fn __eqhf2(a: f16, b: f16) -> crate::float::cmp::CmpResult {
+        cmp(a, b).to_le_abi()
+    }
+
+    pub extern "C" fn __lthf2(a: f16, b: f16) -> crate::float::cmp::CmpResult {
+        cmp(a, b).to_le_abi()
+    }
+
+    pub extern "C" fn __nehf2(a: f16, b: f16) -> crate::float::cmp::CmpResult {
+        cmp(a, b).to_le_abi()
+    }
+
+    pub extern "C" fn __gthf2(a: f16, b: f16) -> crate::float::cmp::CmpResult {
+        cmp(a, b).to_ge_abi()
+    }
+}
+
 intrinsics! {
     pub extern "C" fn __lesf2(a: f32, b: f32) -> crate::float::cmp::CmpResult {
         cmp(a, b).to_le_abi()
diff --git a/compiler-builtins/src/float/mul.rs b/compiler-builtins/src/float/mul.rs
@@ -180,6 +180,11 @@ where
 }
 
 intrinsics! {
+    #[cfg(f16_enabled)]
+    pub extern "C" fn __mulhf3(a: f16, b: f16) -> f16 {
+        mul(a, b)
+    }
+
     #[aapcs_on_arm]
     #[arm_aeabi_alias = __aeabi_fmul]
     pub extern "C" fn __mulsf3(a: f32, b: f32) -> f32 {
diff --git a/compiler-builtins/src/float/sub.rs b/compiler-builtins/src/float/sub.rs
@@ -1,6 +1,11 @@
 use crate::float::Float;
 
 intrinsics! {
+    #[cfg(f16_enabled)]
+    pub extern "C" fn __subhf3(a: f16, b: f16) -> f16 {
+        crate::float::add::__addhf3(a, f16::from_bits(b.to_bits() ^ f16::SIGN_MASK))
+    }
+
     #[arm_aeabi_alias = __aeabi_fsub]
     pub extern "C" fn __subsf3(a: f32, b: f32) -> f32 {
         crate::float::add::__addsf3(a, f32::from_bits(b.to_bits() ^ f32::SIGN_MASK))