Backport 28602f3d3ec15b5241a33a46ce43349e6300395d

duke · duke · commit 9f83404cb4ca · 2025-08-27T13:01:47.000Z
diff --git a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp
@@ -2391,26 +2391,15 @@ static void float_to_float16_slow_path(C2_MacroAssembler& masm, C2GeneralStub<Re
   Register tmp = stub.data<2>();
   __ bind(stub.entry());
 
-  __ fmv_x_w(dst, src);
-
-  // preserve the payloads of non-canonical NaNs.
-  __ srai(dst, dst, 13);
-  // preserve the sign bit.
-  __ srai(tmp, dst, 13);
-  __ slli(tmp, tmp, 10);
-  __ mv(t0, 0x3ff);
-  __ orr(tmp, tmp, t0);
-
-  // get the result by merging sign bit and payloads of preserved non-canonical NaNs.
-  __ andr(dst, dst, tmp);
+  __ float_to_float16_NaN(dst, src, t0, tmp);
 
   __ j(stub.continuation());
 #undef __
 }
 
 // j.l.Float.floatToFloat16
 void C2_MacroAssembler::float_to_float16(Register dst, FloatRegister src, FloatRegister ftmp, Register xtmp) {
-  auto stub = C2CodeStub::make<Register, FloatRegister, Register>(dst, src, xtmp, 130, float_to_float16_slow_path);
+  auto stub = C2CodeStub::make<Register, FloatRegister, Register>(dst, src, xtmp, 64, float_to_float16_slow_path);
 
   // On riscv, NaN needs a special process as fcvt does not work in that case.
 
diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
@@ -5954,6 +5954,62 @@ void MacroAssembler::java_round_double(Register dst, FloatRegister src, FloatReg
   bind(done);
 }
 
+// Helper routine processing the slow path of NaN when converting float to float16
+void MacroAssembler::float_to_float16_NaN(Register dst, FloatRegister src,
+                                          Register tmp1, Register tmp2) {
+  fmv_x_w(dst, src);
+
+  //  Float (32 bits)
+  //    Bit:     31        30 to 23          22 to 0
+  //          +---+------------------+-----------------------------+
+  //          | S |     Exponent     |      Mantissa (Fraction)    |
+  //          +---+------------------+-----------------------------+
+  //          1 bit       8 bits                  23 bits
+  //
+  //  Float (16 bits)
+  //    Bit:    15        14 to 10         9 to 0
+  //          +---+----------------+------------------+
+  //          | S |    Exponent    |     Mantissa     |
+  //          +---+----------------+------------------+
+  //          1 bit      5 bits          10 bits
+  const int fp_sign_bits = 1;
+  const int fp32_bits = 32;
+  const int fp32_exponent_bits = 8;
+  const int fp32_mantissa_1st_part_bits = 10;
+  const int fp32_mantissa_2nd_part_bits = 9;
+  const int fp32_mantissa_3rd_part_bits = 4;
+  const int fp16_exponent_bits = 5;
+  const int fp16_mantissa_bits = 10;
+
+  // preserve the sign bit and exponent, clear mantissa.
+  srai(tmp2, dst, fp32_bits - fp_sign_bits - fp16_exponent_bits);
+  slli(tmp2, tmp2, fp16_mantissa_bits);
+
+  // Preserve high order bit of float NaN in the
+  // binary16 result NaN (tenth bit); OR in remaining
+  // bits into lower 9 bits of binary 16 significand.
+  //   | (doppel & 0x007f_e000) >> 13 // 10 bits
+  //   | (doppel & 0x0000_1ff0) >> 4  //  9 bits
+  //   | (doppel & 0x0000_000f));     //  4 bits
+  //
+  // Check j.l.Float.floatToFloat16 for more information.
+  // 10 bits
+  int left_shift = fp_sign_bits + fp32_exponent_bits + 32;
+  int right_shift = left_shift + fp32_mantissa_2nd_part_bits + fp32_mantissa_3rd_part_bits;
+  slli(tmp1, dst, left_shift);
+  srli(tmp1, tmp1, right_shift);
+  orr(tmp2, tmp2, tmp1);
+  // 9 bits
+  left_shift += fp32_mantissa_1st_part_bits;
+  right_shift = left_shift + fp32_mantissa_3rd_part_bits;
+  slli(tmp1, dst, left_shift);
+  srli(tmp1, tmp1, right_shift);
+  orr(tmp2, tmp2, tmp1);
+  // 4 bits
+  andi(tmp1, dst, 0xf);
+  orr(dst, tmp2, tmp1);
+}
+
 #define FCVT_SAFE(FLOATCVT, FLOATSIG)                                                     \
 void MacroAssembler::FLOATCVT##_safe(Register dst, FloatRegister src, Register tmp) {     \
   Label done;                                                                             \
diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
@@ -1431,6 +1431,9 @@ class MacroAssembler: public Assembler {
   void java_round_float(Register dst, FloatRegister src, FloatRegister ftmp);
   void java_round_double(Register dst, FloatRegister src, FloatRegister ftmp);
 
+  // Helper routine processing the slow path of NaN when converting float to float16
+  void float_to_float16_NaN(Register dst, FloatRegister src, Register tmp1, Register tmp2);
+
   // vector load/store unit-stride instructions
   void vlex_v(VectorRegister vd, Register base, Assembler::SEW sew, VectorMask vm = unmasked) {
     switch (sew) {
diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad
@@ -8545,7 +8545,7 @@ instruct convF2HF_reg_reg(iRegINoSp dst, fRegF src, fRegF ftmp, iRegINoSp xtmp)
 instruct reinterpretS2HF(fRegF dst, iRegI src)
 %{
   match(Set dst (ReinterpretS2HF src));
-  format %{ "fmv.h.x $dst, $src" %}
+  format %{ "fmv.h.x $dst, $src\t# reinterpretS2HF" %}
   ins_encode %{
     __ fmv_h_x($dst$$FloatRegister, $src$$Register);
   %}
@@ -8565,7 +8565,7 @@ instruct convF2HFAndS2HF(fRegF dst, fRegF src)
 instruct reinterpretHF2S(iRegINoSp dst, fRegF src)
 %{
   match(Set dst (ReinterpretHF2S src));
-  format %{ "fmv.x.h $dst, $src" %}
+  format %{ "fmv.x.h $dst, $src\t# reinterpretHF2S" %}
   ins_encode %{
     __ fmv_x_h($dst$$Register, $src$$FloatRegister);
   %}
diff --git a/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp b/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp
@@ -6354,18 +6354,8 @@ class StubGenerator: public StubCodeGenerator {
     __ ret();
 
     __ bind(NaN_SLOW);
-    __ fmv_x_w(dst, src);
-
-    // preserve the payloads of non-canonical NaNs.
-    __ srai(dst, dst, 13);
-    // preserve the sign bit.
-    __ srai(t1, dst, 13);
-    __ slli(t1, t1, 10);
-    __ mv(t0, 0x3ff);
-    __ orr(t1, t1, t0);
-
-    // get the result by merging sign bit and payloads of preserved non-canonical NaNs.
-    __ andr(dst, dst, t1);
+
+    __ float_to_float16_NaN(dst, src, t0, t1);
 
     __ ret();
     return entry;
diff --git a/test/hotspot/jtreg/compiler/intrinsics/float16/Binary16ConversionNaN_2.java b/test/hotspot/jtreg/compiler/intrinsics/float16/Binary16ConversionNaN_2.java
@@ -0,0 +1,178 @@
+/*
+ * Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2025, Rivos Inc. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @test
+ * @bug 8365206
+ * @summary Verify NaN sign and significand bits are preserved across conversions,
+ *          float -> float16 -> float
+ * @requires (os.arch == "riscv64" & vm.cpu.features ~= ".*zfh.*")
+ * @requires vm.compiler1.enabled & vm.compiler2.enabled
+ * @requires vm.compMode != "Xcomp"
+ * @library /test/lib /
+ *
+ * @build jdk.test.whitebox.WhiteBox
+ * @run driver jdk.test.lib.helpers.ClassFileInstaller jdk.test.whitebox.WhiteBox
+ * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI
+ *                   -Xmixed -XX:-BackgroundCompilation -XX:-UseOnStackReplacement
+ *                   -XX:CompileThresholdScaling=1000.0 Binary16ConversionNaN_2
+ */
+
+/*
+ * The behavior tested below is an implementation property not
+ * required by the specification. It would be acceptable for this
+ * information to not be preserved (as long as a NaN is returned) if,
+ * say, a intrinsified version using native hardware instructions
+ * behaved differently.
+ *
+ * If that is the case, this test should be modified to disable
+ * intrinsics or to otherwise not run on platforms with an differently
+ * behaving intrinsic.
+ */
+
+import compiler.whitebox.CompilerWhiteBoxTest;
+import jdk.test.whitebox.WhiteBox;
+import java.lang.reflect.Method;
+import java.util.Random;
+
+public class Binary16ConversionNaN_2 {
+
+    private static final WhiteBox WHITE_BOX = WhiteBox.getWhiteBox();
+
+    /*
+     * Put all 16-bit NaN values through a conversion loop and make
+     * sure the significand, sign, and exponent are all preserved.
+     */
+    public static void main(String... argv) throws NoSuchMethodException {
+        int errors = 0;
+        final int NAN_EXPONENT = 0x7f80_0000;
+        final int SIGN_BIT     = 0x8000_0000;
+
+        // First, run with Interpreter only to  collect "gold" data.
+        // Glags -Xmixed -XX:CompileThresholdScaling=1000.0 are used
+        // to prevent compilation during this phase.
+        float[] pVal = new float[1024];
+        float[] pRes = new float[1024];
+        float[] nVal = new float[1024];
+        float[] nRes = new float[1024];
+
+        Random rand = new Random();
+
+        // A NaN has a nonzero significand
+        for (int i = 1; i <= 0x3ff; i++) {
+            int shift = rand.nextInt(13+1);
+            int binaryNaN = (NAN_EXPONENT | (i << shift));
+            assert isNaN(binaryNaN);
+            // the payloads of non-canonical NaNs are preserved.
+            float f1 = Float.intBitsToFloat(binaryNaN);
+            float f2 = testRoundTrip(f1);
+            errors  += verify(f1, f2);
+            pVal[i] = f1;
+            pRes[i] = f2;
+
+            int binaryNegNaN = (SIGN_BIT | binaryNaN);
+            float f3 = Float.intBitsToFloat(binaryNegNaN);
+            float f4 = testRoundTrip(f3);
+            errors  += verify(f3, f4);
+            nVal[i] = f3;
+            nRes[i] = f4;
+        }
+        if (errors > 0) { // Exit if Interpreter failed
+            throw new RuntimeException(errors + " errors");
+        }
+
+        Method test_method = Binary16ConversionNaN_2.class.getDeclaredMethod("testRoundTrip", float.class);
+
+        // Compile with C1 and compare results
+        WHITE_BOX.enqueueMethodForCompilation(test_method, CompilerWhiteBoxTest.COMP_LEVEL_SIMPLE);
+        if (!WHITE_BOX.isMethodCompiled(test_method)) {
+            throw new RuntimeException("test is not compiled by C1");
+        }
+        for (int i = 1; i <= 0x3ff; i++) {
+            float f1 = testRoundTrip(pVal[i]);
+            errors  += verifyCompiler(pRes[i], f1, "C1");
+            float f2 = testRoundTrip(nVal[i]);
+            errors  += verifyCompiler(nRes[i], f2, "C1");
+        }
+
+        WHITE_BOX.deoptimizeMethod(test_method);
+
+        // Compile with C2 and compare results
+        WHITE_BOX.enqueueMethodForCompilation(test_method, CompilerWhiteBoxTest.COMP_LEVEL_FULL_OPTIMIZATION);
+        if (!WHITE_BOX.isMethodCompiled(test_method)) {
+            throw new RuntimeException("test is not compiled by C2");
+        }
+        for (int i = 1; i <= 0x3ff; i++) {
+            float f1 = testRoundTrip(pVal[i]);
+            errors  += verifyCompiler(pRes[i], f1, "C2");
+            float f2 = testRoundTrip(nVal[i]);
+            errors  += verifyCompiler(nRes[i], f2, "C2");
+        }
+
+        if (errors > 0) {
+            throw new RuntimeException(errors + " errors");
+        }
+    }
+
+    private static boolean isNaN(int binary) {
+        return ((binary & 0x7f80_0000) == 0x7f80_0000) // Max exponent and...
+            && ((binary & 0x007f_ffff) != 0 );         // significand nonzero.
+    }
+
+    private static float testRoundTrip(float f) {
+        short s = Float.floatToFloat16(f);
+        return Float.float16ToFloat(s);
+    }
+
+    private static int verify(float f1, float f2) {
+        int errors = 0;
+        int i1 = Float.floatToRawIntBits(f1);
+        int i2 = Float.floatToRawIntBits(f2);
+        assert Float.isNaN(f1);
+        if (!Float.isNaN(f2) ||
+            ((i1 & 0x8000_0000) != (i2 & 0x8000_0000))) {
+            errors++;
+            System.out.println("Roundtrip failure on NaN value " +
+                               Integer.toHexString(i1) +
+                               "\t got back " + Integer.toHexString(i2));
+        }
+        return errors;
+    }
+
+    private static int verifyCompiler(float f1, float f2, String name) {
+        int errors = 0;
+        int i1 = Float.floatToRawIntBits(f1);
+        int i2 = Float.floatToRawIntBits(f2);
+        assert Float.isNaN(f1);
+        if (!Float.isNaN(f2) ||
+            ((i1 & 0x8000_0000) != (i2 & 0x8000_0000))) {
+            errors++;
+            System.out.println("Roundtrip failure on NaN value " +
+                               Integer.toHexString(i1) +
+                               "\t got back " + Integer.toHexString(i2) +
+                               "\t from " + name + " code");
+        }
+        return errors;
+    }
+}