Skip to content

Commit d9dfb67

Browse files
author
duke
committed
Backport 32df2d17f3c0407ad7e90eacfdc0fd7a65f67551
1 parent eead543 commit d9dfb67

File tree

3 files changed

+66
-20
lines changed

3 files changed

+66
-20
lines changed

src/hotspot/cpu/riscv/assembler_riscv.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1988,6 +1988,7 @@ enum VectorMask {
19881988

19891989
// Vector Narrowing Integer Right Shift Instructions
19901990
INSN(vnsra_wi, 0b1010111, 0b011, 0b101101);
1991+
INSN(vnsrl_wi, 0b1010111, 0b011, 0b101100);
19911992

19921993
#undef INSN
19931994

src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp

Lines changed: 55 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -2489,41 +2489,80 @@ static void float_to_float16_v_slow_path(C2_MacroAssembler& masm,
24892489
#define __ masm.
24902490
VectorRegister dst = stub.data<0>();
24912491
VectorRegister src = stub.data<1>();
2492-
VectorRegister tmp = stub.data<2>();
2492+
VectorRegister vtmp = stub.data<2>();
2493+
assert_different_registers(dst, src, vtmp);
2494+
24932495
__ bind(stub.entry());
24942496

2497+
// Active elements (NaNs) are marked in v0 mask register.
24952498
// mul is already set to mf2 in float_to_float16_v.
24962499

2497-
// preserve the payloads of non-canonical NaNs.
2498-
__ vnsra_wi(dst, src, 13, Assembler::v0_t);
2499-
2500-
// preserve the sign bit.
2501-
__ vnsra_wi(tmp, src, 26, Assembler::v0_t);
2502-
__ vsll_vi(tmp, tmp, 10, Assembler::v0_t);
2503-
__ mv(t0, 0x3ff);
2504-
__ vor_vx(tmp, tmp, t0, Assembler::v0_t);
2505-
2506-
// get the result by merging sign bit and payloads of preserved non-canonical NaNs.
2507-
__ vand_vv(dst, dst, tmp, Assembler::v0_t);
2500+
// Float (32 bits)
2501+
// Bit: 31 30 to 23 22 to 0
2502+
// +---+------------------+-----------------------------+
2503+
// | S | Exponent | Mantissa (Fraction) |
2504+
// +---+------------------+-----------------------------+
2505+
// 1 bit 8 bits 23 bits
2506+
//
2507+
// Float (16 bits)
2508+
// Bit: 15 14 to 10 9 to 0
2509+
// +---+----------------+------------------+
2510+
// | S | Exponent | Mantissa |
2511+
// +---+----------------+------------------+
2512+
// 1 bit 5 bits 10 bits
2513+
const int fp_sign_bits = 1;
2514+
const int fp32_bits = 32;
2515+
const int fp32_mantissa_2nd_part_bits = 9;
2516+
const int fp32_mantissa_3rd_part_bits = 4;
2517+
const int fp16_exponent_bits = 5;
2518+
const int fp16_mantissa_bits = 10;
2519+
2520+
// preserve the sign bit and exponent, clear mantissa.
2521+
__ vnsra_wi(dst, src, fp32_bits - fp_sign_bits - fp16_exponent_bits, Assembler::v0_t);
2522+
__ vsll_vi(dst, dst, fp16_mantissa_bits, Assembler::v0_t);
2523+
2524+
// Preserve high order bit of float NaN in the
2525+
// binary16 result NaN (tenth bit); OR in remaining
2526+
// bits into lower 9 bits of binary 16 significand.
2527+
// | (doppel & 0x007f_e000) >> 13 // 10 bits
2528+
// | (doppel & 0x0000_1ff0) >> 4 // 9 bits
2529+
// | (doppel & 0x0000_000f)); // 4 bits
2530+
//
2531+
// Check j.l.Float.floatToFloat16 for more information.
2532+
// 10 bits
2533+
__ vnsrl_wi(vtmp, src, fp32_mantissa_2nd_part_bits + fp32_mantissa_3rd_part_bits, Assembler::v0_t);
2534+
__ mv(t0, 0x3ff); // retain first part of mantissa in a float 32
2535+
__ vand_vx(vtmp, vtmp, t0, Assembler::v0_t);
2536+
__ vor_vv(dst, dst, vtmp, Assembler::v0_t);
2537+
// 9 bits
2538+
__ vnsrl_wi(vtmp, src, fp32_mantissa_3rd_part_bits, Assembler::v0_t);
2539+
__ mv(t0, 0x1ff); // retain second part of mantissa in a float 32
2540+
__ vand_vx(vtmp, vtmp, t0, Assembler::v0_t);
2541+
__ vor_vv(dst, dst, vtmp, Assembler::v0_t);
2542+
// 4 bits
2543+
// Narrow shift is necessary to move data from 32 bits element to 16 bits element in vector register.
2544+
__ vnsrl_wi(vtmp, src, 0, Assembler::v0_t);
2545+
__ vand_vi(vtmp, vtmp, 0xf, Assembler::v0_t);
2546+
__ vor_vv(dst, dst, vtmp, Assembler::v0_t);
25082547

25092548
__ j(stub.continuation());
25102549
#undef __
25112550
}
25122551

25132552
// j.l.Float.float16ToFloat
2514-
void C2_MacroAssembler::float_to_float16_v(VectorRegister dst, VectorRegister src, VectorRegister vtmp,
2515-
Register tmp, uint vector_length) {
2553+
void C2_MacroAssembler::float_to_float16_v(VectorRegister dst, VectorRegister src,
2554+
VectorRegister vtmp, Register tmp, uint vector_length) {
25162555
assert_different_registers(dst, src, vtmp);
25172556

25182557
auto stub = C2CodeStub::make<VectorRegister, VectorRegister, VectorRegister>
2519-
(dst, src, vtmp, 28, float_to_float16_v_slow_path);
2558+
(dst, src, vtmp, 56, float_to_float16_v_slow_path);
25202559

25212560
// On riscv, NaN needs a special process as vfncvt_f_f_w does not work in that case.
25222561

25232562
vsetvli_helper(BasicType::T_FLOAT, vector_length, Assembler::m1);
25242563

25252564
// check whether there is a NaN.
2526-
// replace v_fclass with vmseq_vv as performance optimization.
2565+
// replace v_fclass with vmfne_vv as performance optimization.
25272566
vmfne_vv(v0, src, src);
25282567
vcpop_m(t0, v0);
25292568

test/hotspot/jtreg/compiler/vectorization/TestFloatConversionsVectorNaN.java

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323

2424
/**
2525
* @test
26+
* @key randomness
2627
* @bug 8320646
2728
* @summary Auto-vectorize Float.floatToFloat16, Float.float16ToFloat APIs, with NaN
2829
* @requires vm.compiler2.enabled
@@ -37,9 +38,11 @@
3738
package compiler.vectorization;
3839

3940
import java.util.HexFormat;
41+
import java.util.Random;
4042

4143
import compiler.lib.ir_framework.*;
4244
import jdk.test.lib.Asserts;
45+
import jdk.test.lib.Utils;
4346

4447
public class TestFloatConversionsVectorNaN {
4548
private static final int ARRLEN = 1024;
@@ -79,14 +82,16 @@ public void test_float_float16(short[] sout, float[] finp) {
7982

8083
@Run(test = {"test_float_float16"}, mode = RunMode.STANDALONE)
8184
public void kernel_test_float_float16() {
85+
Random rand = Utils.getRandomInstance();
8286
int errno = 0;
8387
finp = new float[ARRLEN];
8488
sout = new short[ARRLEN];
8589

8690
// Setup
8791
for (int i = 0; i < ARRLEN; i++) {
88-
if (i%39 == 0) {
89-
int x = 0x7f800000 + ((i/39) << 13);
92+
if (i%3 == 0) {
93+
int shift = rand.nextInt(13+1);
94+
int x = 0x7f800000 + ((i/39) << shift);
9095
x = (i%2 == 0) ? x : (x | 0x80000000);
9196
finp[i] = Float.intBitsToFloat(x);
9297
} else {
@@ -128,7 +133,8 @@ public void kernel_test_float_float16() {
128133

129134
static int assertEquals(int idx, float f, short expected, short actual) {
130135
HexFormat hf = HexFormat.of();
131-
String msg = "floatToFloat16 wrong result: idx: " + idx + ", \t" + f +
136+
String msg = "floatToFloat16 wrong result: idx: " + idx +
137+
", \t" + f + ", hex: " + Integer.toHexString(Float.floatToRawIntBits(f)) +
132138
",\t expected: " + hf.toHexDigits(expected) +
133139
",\t actual: " + hf.toHexDigits(actual);
134140
if ((expected & 0x7c00) != 0x7c00) {
@@ -167,7 +173,7 @@ public void kernel_test_float16_float() {
167173

168174
// Setup
169175
for (int i = 0; i < ARRLEN; i++) {
170-
if (i%39 == 0) {
176+
if (i%3 == 0) {
171177
int x = 0x7c00 + i;
172178
x = (i%2 == 0) ? x : (x | 0x8000);
173179
sinp[i] = (short)x;

0 commit comments

Comments
 (0)