[SYCL] Fix apparently incorrect bfloat16 conversions (#20243)

aelovikov-intel · web-flow · commit b57a01a7649d · 2025-09-30T09:13:32.000+02:00
Those looked wrong but I don't have any tests. We can either merge as-is
or create an issue for the original author(s) to follow up.

Seems that `bfloat16` APIs don't need any changes since C++23's
`std::bfloat16` behaves in a similar way (although it's fundamental
instead of a class type).
diff --git a/sycl/include/sycl/ext/oneapi/bfloat16.hpp b/sycl/include/sycl/ext/oneapi/bfloat16.hpp
@@ -407,25 +407,25 @@ inline bfloat16 getBFloat16FromDoubleWithRTE(const double &d) {
   // handling +/-infinity and NAN for double input
   if (fp64_exp == 0x7FF) {
     if (!fp64_mant)
-      return bf16_sign ? 0xFF80 : 0x7F80;
+      return bit_cast<bfloat16, uint16_t>(bf16_sign ? 0xFF80 : 0x7F80);
 
     // returns a quiet NaN
-    return 0x7FC0;
+    return bit_cast<bfloat16, uint16_t>(0x7FC0);
   }
 
   // Subnormal double precision is converted to 0
   if (fp64_exp == 0)
-    return bf16_sign ? 0x8000 : 0x0;
+    return bit_cast<bfloat16, uint16_t>(bf16_sign ? 0x8000 : 0x0);
 
   fp64_exp -= 1023;
 
   // handling overflow, convert to +/-infinity
   if (static_cast<int16_t>(fp64_exp) > 127)
-    return bf16_sign ? 0xFF80 : 0x7F80;
+    return bit_cast<bfloat16, uint16_t>(bf16_sign ? 0xFF80 : 0x7F80);
 
   // handling underflow
   if (static_cast<int16_t>(fp64_exp) < -133)
-    return bf16_sign ? 0x8000 : 0x0;
+    return bit_cast<bfloat16, uint16_t>(bf16_sign ? 0x8000 : 0x0);
 
   //-133 <= fp64_exp <= 127, 1.signicand * 2^fp64_exp
   // For these numbers, they are NOT subnormal double-precision numbers but
@@ -444,7 +444,8 @@ inline bfloat16 getBFloat16FromDoubleWithRTE(const double &d) {
       bf16_mant = 0;
       fp64_exp = 1;
     }
-    return (bf16_sign << 15) | (fp64_exp << 7) | bf16_mant;
+    return bit_cast<bfloat16, uint16_t>((bf16_sign << 15) | (fp64_exp << 7) |
+                                        bf16_mant);
   }
 
   // For normal value, discard 45 bits from mantissa
@@ -462,7 +463,8 @@ inline bfloat16 getBFloat16FromDoubleWithRTE(const double &d) {
   }
   fp64_exp += 127;
 
-  return (bf16_sign << 15) | (fp64_exp << 7) | bf16_mant;
+  return bit_cast<bfloat16, uint16_t>((bf16_sign << 15) | (fp64_exp << 7) |
+                                      bf16_mant);
 }
 
 // Function to get the most significant bit position of a number.