Skip to content

Commit 9f83404

Browse files
author
duke
committed
Backport 28602f3d3ec15b5241a33a46ce43349e6300395d
1 parent eead543 commit 9f83404

File tree

6 files changed

+243
-27
lines changed

6 files changed

+243
-27
lines changed

src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp

Lines changed: 2 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -2391,26 +2391,15 @@ static void float_to_float16_slow_path(C2_MacroAssembler& masm, C2GeneralStub<Re
23912391
Register tmp = stub.data<2>();
23922392
__ bind(stub.entry());
23932393

2394-
__ fmv_x_w(dst, src);
2395-
2396-
// preserve the payloads of non-canonical NaNs.
2397-
__ srai(dst, dst, 13);
2398-
// preserve the sign bit.
2399-
__ srai(tmp, dst, 13);
2400-
__ slli(tmp, tmp, 10);
2401-
__ mv(t0, 0x3ff);
2402-
__ orr(tmp, tmp, t0);
2403-
2404-
// get the result by merging sign bit and payloads of preserved non-canonical NaNs.
2405-
__ andr(dst, dst, tmp);
2394+
__ float_to_float16_NaN(dst, src, t0, tmp);
24062395

24072396
__ j(stub.continuation());
24082397
#undef __
24092398
}
24102399

24112400
// j.l.Float.floatToFloat16
24122401
void C2_MacroAssembler::float_to_float16(Register dst, FloatRegister src, FloatRegister ftmp, Register xtmp) {
2413-
auto stub = C2CodeStub::make<Register, FloatRegister, Register>(dst, src, xtmp, 130, float_to_float16_slow_path);
2402+
auto stub = C2CodeStub::make<Register, FloatRegister, Register>(dst, src, xtmp, 64, float_to_float16_slow_path);
24142403

24152404
// On riscv, NaN needs a special process as fcvt does not work in that case.
24162405

src/hotspot/cpu/riscv/macroAssembler_riscv.cpp

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5954,6 +5954,62 @@ void MacroAssembler::java_round_double(Register dst, FloatRegister src, FloatReg
59545954
bind(done);
59555955
}
59565956

5957+
// Helper routine processing the slow path of NaN when converting float to float16
5958+
void MacroAssembler::float_to_float16_NaN(Register dst, FloatRegister src,
5959+
Register tmp1, Register tmp2) {
5960+
fmv_x_w(dst, src);
5961+
5962+
// Float (32 bits)
5963+
// Bit: 31 30 to 23 22 to 0
5964+
// +---+------------------+-----------------------------+
5965+
// | S | Exponent | Mantissa (Fraction) |
5966+
// +---+------------------+-----------------------------+
5967+
// 1 bit 8 bits 23 bits
5968+
//
5969+
// Float (16 bits)
5970+
// Bit: 15 14 to 10 9 to 0
5971+
// +---+----------------+------------------+
5972+
// | S | Exponent | Mantissa |
5973+
// +---+----------------+------------------+
5974+
// 1 bit 5 bits 10 bits
5975+
const int fp_sign_bits = 1;
5976+
const int fp32_bits = 32;
5977+
const int fp32_exponent_bits = 8;
5978+
const int fp32_mantissa_1st_part_bits = 10;
5979+
const int fp32_mantissa_2nd_part_bits = 9;
5980+
const int fp32_mantissa_3rd_part_bits = 4;
5981+
const int fp16_exponent_bits = 5;
5982+
const int fp16_mantissa_bits = 10;
5983+
5984+
// preserve the sign bit and exponent, clear mantissa.
5985+
srai(tmp2, dst, fp32_bits - fp_sign_bits - fp16_exponent_bits);
5986+
slli(tmp2, tmp2, fp16_mantissa_bits);
5987+
5988+
// Preserve high order bit of float NaN in the
5989+
// binary16 result NaN (tenth bit); OR in remaining
5990+
// bits into lower 9 bits of binary 16 significand.
5991+
// | (doppel & 0x007f_e000) >> 13 // 10 bits
5992+
// | (doppel & 0x0000_1ff0) >> 4 // 9 bits
5993+
// | (doppel & 0x0000_000f)); // 4 bits
5994+
//
5995+
// Check j.l.Float.floatToFloat16 for more information.
5996+
// 10 bits
5997+
int left_shift = fp_sign_bits + fp32_exponent_bits + 32;
5998+
int right_shift = left_shift + fp32_mantissa_2nd_part_bits + fp32_mantissa_3rd_part_bits;
5999+
slli(tmp1, dst, left_shift);
6000+
srli(tmp1, tmp1, right_shift);
6001+
orr(tmp2, tmp2, tmp1);
6002+
// 9 bits
6003+
left_shift += fp32_mantissa_1st_part_bits;
6004+
right_shift = left_shift + fp32_mantissa_3rd_part_bits;
6005+
slli(tmp1, dst, left_shift);
6006+
srli(tmp1, tmp1, right_shift);
6007+
orr(tmp2, tmp2, tmp1);
6008+
// 4 bits
6009+
andi(tmp1, dst, 0xf);
6010+
orr(dst, tmp2, tmp1);
6011+
}
6012+
59576013
#define FCVT_SAFE(FLOATCVT, FLOATSIG) \
59586014
void MacroAssembler::FLOATCVT##_safe(Register dst, FloatRegister src, Register tmp) { \
59596015
Label done; \

src/hotspot/cpu/riscv/macroAssembler_riscv.hpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1431,6 +1431,9 @@ class MacroAssembler: public Assembler {
14311431
void java_round_float(Register dst, FloatRegister src, FloatRegister ftmp);
14321432
void java_round_double(Register dst, FloatRegister src, FloatRegister ftmp);
14331433

1434+
// Helper routine processing the slow path of NaN when converting float to float16
1435+
void float_to_float16_NaN(Register dst, FloatRegister src, Register tmp1, Register tmp2);
1436+
14341437
// vector load/store unit-stride instructions
14351438
void vlex_v(VectorRegister vd, Register base, Assembler::SEW sew, VectorMask vm = unmasked) {
14361439
switch (sew) {

src/hotspot/cpu/riscv/riscv.ad

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8545,7 +8545,7 @@ instruct convF2HF_reg_reg(iRegINoSp dst, fRegF src, fRegF ftmp, iRegINoSp xtmp)
85458545
instruct reinterpretS2HF(fRegF dst, iRegI src)
85468546
%{
85478547
match(Set dst (ReinterpretS2HF src));
8548-
format %{ "fmv.h.x $dst, $src" %}
8548+
format %{ "fmv.h.x $dst, $src\t# reinterpretS2HF" %}
85498549
ins_encode %{
85508550
__ fmv_h_x($dst$$FloatRegister, $src$$Register);
85518551
%}
@@ -8565,7 +8565,7 @@ instruct convF2HFAndS2HF(fRegF dst, fRegF src)
85658565
instruct reinterpretHF2S(iRegINoSp dst, fRegF src)
85668566
%{
85678567
match(Set dst (ReinterpretHF2S src));
8568-
format %{ "fmv.x.h $dst, $src" %}
8568+
format %{ "fmv.x.h $dst, $src\t# reinterpretHF2S" %}
85698569
ins_encode %{
85708570
__ fmv_x_h($dst$$Register, $src$$FloatRegister);
85718571
%}

src/hotspot/cpu/riscv/stubGenerator_riscv.cpp

Lines changed: 2 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -6354,18 +6354,8 @@ class StubGenerator: public StubCodeGenerator {
63546354
__ ret();
63556355

63566356
__ bind(NaN_SLOW);
6357-
__ fmv_x_w(dst, src);
6358-
6359-
// preserve the payloads of non-canonical NaNs.
6360-
__ srai(dst, dst, 13);
6361-
// preserve the sign bit.
6362-
__ srai(t1, dst, 13);
6363-
__ slli(t1, t1, 10);
6364-
__ mv(t0, 0x3ff);
6365-
__ orr(t1, t1, t0);
6366-
6367-
// get the result by merging sign bit and payloads of preserved non-canonical NaNs.
6368-
__ andr(dst, dst, t1);
6357+
6358+
__ float_to_float16_NaN(dst, src, t0, t1);
63696359

63706360
__ ret();
63716361
return entry;
Lines changed: 178 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,178 @@
1+
/*
2+
* Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved.
3+
* Copyright (c) 2025, Rivos Inc. All rights reserved.
4+
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5+
*
6+
* This code is free software; you can redistribute it and/or modify it
7+
* under the terms of the GNU General Public License version 2 only, as
8+
* published by the Free Software Foundation.
9+
*
10+
* This code is distributed in the hope that it will be useful, but WITHOUT
11+
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12+
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13+
* version 2 for more details (a copy is included in the LICENSE file that
14+
* accompanied this code).
15+
*
16+
* You should have received a copy of the GNU General Public License version
17+
* 2 along with this work; if not, write to the Free Software Foundation,
18+
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
19+
*
20+
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
21+
* or visit www.oracle.com if you need additional information or have any
22+
* questions.
23+
*/
24+
25+
/*
26+
* @test
27+
* @bug 8365206
28+
* @summary Verify NaN sign and significand bits are preserved across conversions,
29+
* float -> float16 -> float
30+
* @requires (os.arch == "riscv64" & vm.cpu.features ~= ".*zfh.*")
31+
* @requires vm.compiler1.enabled & vm.compiler2.enabled
32+
* @requires vm.compMode != "Xcomp"
33+
* @library /test/lib /
34+
*
35+
* @build jdk.test.whitebox.WhiteBox
36+
* @run driver jdk.test.lib.helpers.ClassFileInstaller jdk.test.whitebox.WhiteBox
37+
* @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI
38+
* -Xmixed -XX:-BackgroundCompilation -XX:-UseOnStackReplacement
39+
* -XX:CompileThresholdScaling=1000.0 Binary16ConversionNaN_2
40+
*/
41+
42+
/*
43+
* The behavior tested below is an implementation property not
44+
* required by the specification. It would be acceptable for this
45+
* information to not be preserved (as long as a NaN is returned) if,
46+
* say, a intrinsified version using native hardware instructions
47+
* behaved differently.
48+
*
49+
* If that is the case, this test should be modified to disable
50+
* intrinsics or to otherwise not run on platforms with an differently
51+
* behaving intrinsic.
52+
*/
53+
54+
import compiler.whitebox.CompilerWhiteBoxTest;
55+
import jdk.test.whitebox.WhiteBox;
56+
import java.lang.reflect.Method;
57+
import java.util.Random;
58+
59+
public class Binary16ConversionNaN_2 {
60+
61+
private static final WhiteBox WHITE_BOX = WhiteBox.getWhiteBox();
62+
63+
/*
64+
* Put all 16-bit NaN values through a conversion loop and make
65+
* sure the significand, sign, and exponent are all preserved.
66+
*/
67+
public static void main(String... argv) throws NoSuchMethodException {
68+
int errors = 0;
69+
final int NAN_EXPONENT = 0x7f80_0000;
70+
final int SIGN_BIT = 0x8000_0000;
71+
72+
// First, run with Interpreter only to collect "gold" data.
73+
// Glags -Xmixed -XX:CompileThresholdScaling=1000.0 are used
74+
// to prevent compilation during this phase.
75+
float[] pVal = new float[1024];
76+
float[] pRes = new float[1024];
77+
float[] nVal = new float[1024];
78+
float[] nRes = new float[1024];
79+
80+
Random rand = new Random();
81+
82+
// A NaN has a nonzero significand
83+
for (int i = 1; i <= 0x3ff; i++) {
84+
int shift = rand.nextInt(13+1);
85+
int binaryNaN = (NAN_EXPONENT | (i << shift));
86+
assert isNaN(binaryNaN);
87+
// the payloads of non-canonical NaNs are preserved.
88+
float f1 = Float.intBitsToFloat(binaryNaN);
89+
float f2 = testRoundTrip(f1);
90+
errors += verify(f1, f2);
91+
pVal[i] = f1;
92+
pRes[i] = f2;
93+
94+
int binaryNegNaN = (SIGN_BIT | binaryNaN);
95+
float f3 = Float.intBitsToFloat(binaryNegNaN);
96+
float f4 = testRoundTrip(f3);
97+
errors += verify(f3, f4);
98+
nVal[i] = f3;
99+
nRes[i] = f4;
100+
}
101+
if (errors > 0) { // Exit if Interpreter failed
102+
throw new RuntimeException(errors + " errors");
103+
}
104+
105+
Method test_method = Binary16ConversionNaN_2.class.getDeclaredMethod("testRoundTrip", float.class);
106+
107+
// Compile with C1 and compare results
108+
WHITE_BOX.enqueueMethodForCompilation(test_method, CompilerWhiteBoxTest.COMP_LEVEL_SIMPLE);
109+
if (!WHITE_BOX.isMethodCompiled(test_method)) {
110+
throw new RuntimeException("test is not compiled by C1");
111+
}
112+
for (int i = 1; i <= 0x3ff; i++) {
113+
float f1 = testRoundTrip(pVal[i]);
114+
errors += verifyCompiler(pRes[i], f1, "C1");
115+
float f2 = testRoundTrip(nVal[i]);
116+
errors += verifyCompiler(nRes[i], f2, "C1");
117+
}
118+
119+
WHITE_BOX.deoptimizeMethod(test_method);
120+
121+
// Compile with C2 and compare results
122+
WHITE_BOX.enqueueMethodForCompilation(test_method, CompilerWhiteBoxTest.COMP_LEVEL_FULL_OPTIMIZATION);
123+
if (!WHITE_BOX.isMethodCompiled(test_method)) {
124+
throw new RuntimeException("test is not compiled by C2");
125+
}
126+
for (int i = 1; i <= 0x3ff; i++) {
127+
float f1 = testRoundTrip(pVal[i]);
128+
errors += verifyCompiler(pRes[i], f1, "C2");
129+
float f2 = testRoundTrip(nVal[i]);
130+
errors += verifyCompiler(nRes[i], f2, "C2");
131+
}
132+
133+
if (errors > 0) {
134+
throw new RuntimeException(errors + " errors");
135+
}
136+
}
137+
138+
private static boolean isNaN(int binary) {
139+
return ((binary & 0x7f80_0000) == 0x7f80_0000) // Max exponent and...
140+
&& ((binary & 0x007f_ffff) != 0 ); // significand nonzero.
141+
}
142+
143+
private static float testRoundTrip(float f) {
144+
short s = Float.floatToFloat16(f);
145+
return Float.float16ToFloat(s);
146+
}
147+
148+
private static int verify(float f1, float f2) {
149+
int errors = 0;
150+
int i1 = Float.floatToRawIntBits(f1);
151+
int i2 = Float.floatToRawIntBits(f2);
152+
assert Float.isNaN(f1);
153+
if (!Float.isNaN(f2) ||
154+
((i1 & 0x8000_0000) != (i2 & 0x8000_0000))) {
155+
errors++;
156+
System.out.println("Roundtrip failure on NaN value " +
157+
Integer.toHexString(i1) +
158+
"\t got back " + Integer.toHexString(i2));
159+
}
160+
return errors;
161+
}
162+
163+
private static int verifyCompiler(float f1, float f2, String name) {
164+
int errors = 0;
165+
int i1 = Float.floatToRawIntBits(f1);
166+
int i2 = Float.floatToRawIntBits(f2);
167+
assert Float.isNaN(f1);
168+
if (!Float.isNaN(f2) ||
169+
((i1 & 0x8000_0000) != (i2 & 0x8000_0000))) {
170+
errors++;
171+
System.out.println("Roundtrip failure on NaN value " +
172+
Integer.toHexString(i1) +
173+
"\t got back " + Integer.toHexString(i2) +
174+
"\t from " + name + " code");
175+
}
176+
return errors;
177+
}
178+
}

0 commit comments

Comments
 (0)