Skip to content

Commit 9977a36

Browse files
author
duke
committed
Backport 32df2d1
1 parent 6c48f4e commit 9977a36

File tree

3 files changed

+66
-20
lines changed

3 files changed

+66
-20
lines changed

src/hotspot/cpu/riscv/assembler_riscv.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1988,6 +1988,7 @@ enum VectorMask {
19881988

19891989
// Vector Narrowing Integer Right Shift Instructions
19901990
INSN(vnsra_wi, 0b1010111, 0b011, 0b101101);
1991+
INSN(vnsrl_wi, 0b1010111, 0b011, 0b101100);
19911992

19921993
#undef INSN
19931994

src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp

Lines changed: 55 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -2491,41 +2491,80 @@ static void float_to_float16_v_slow_path(C2_MacroAssembler& masm,
24912491
#define __ masm.
24922492
VectorRegister dst = stub.data<0>();
24932493
VectorRegister src = stub.data<1>();
2494-
VectorRegister tmp = stub.data<2>();
2494+
VectorRegister vtmp = stub.data<2>();
2495+
assert_different_registers(dst, src, vtmp);
2496+
24952497
__ bind(stub.entry());
24962498

2499+
// Active elements (NaNs) are marked in v0 mask register.
24972500
// mul is already set to mf2 in float_to_float16_v.
24982501

2499-
// preserve the payloads of non-canonical NaNs.
2500-
__ vnsra_wi(dst, src, 13, Assembler::v0_t);
2501-
2502-
// preserve the sign bit.
2503-
__ vnsra_wi(tmp, src, 26, Assembler::v0_t);
2504-
__ vsll_vi(tmp, tmp, 10, Assembler::v0_t);
2505-
__ mv(t0, 0x3ff);
2506-
__ vor_vx(tmp, tmp, t0, Assembler::v0_t);
2507-
2508-
// get the result by merging sign bit and payloads of preserved non-canonical NaNs.
2509-
__ vand_vv(dst, dst, tmp, Assembler::v0_t);
2502+
// Float (32 bits)
2503+
// Bit: 31 30 to 23 22 to 0
2504+
// +---+------------------+-----------------------------+
2505+
// | S | Exponent | Mantissa (Fraction) |
2506+
// +---+------------------+-----------------------------+
2507+
// 1 bit 8 bits 23 bits
2508+
//
2509+
// Float (16 bits)
2510+
// Bit: 15 14 to 10 9 to 0
2511+
// +---+----------------+------------------+
2512+
// | S | Exponent | Mantissa |
2513+
// +---+----------------+------------------+
2514+
// 1 bit 5 bits 10 bits
2515+
const int fp_sign_bits = 1;
2516+
const int fp32_bits = 32;
2517+
const int fp32_mantissa_2nd_part_bits = 9;
2518+
const int fp32_mantissa_3rd_part_bits = 4;
2519+
const int fp16_exponent_bits = 5;
2520+
const int fp16_mantissa_bits = 10;
2521+
2522+
// preserve the sign bit and exponent, clear mantissa.
2523+
__ vnsra_wi(dst, src, fp32_bits - fp_sign_bits - fp16_exponent_bits, Assembler::v0_t);
2524+
__ vsll_vi(dst, dst, fp16_mantissa_bits, Assembler::v0_t);
2525+
2526+
// Preserve high order bit of float NaN in the
2527+
// binary16 result NaN (tenth bit); OR in remaining
2528+
// bits into lower 9 bits of binary 16 significand.
2529+
// | (doppel & 0x007f_e000) >> 13 // 10 bits
2530+
// | (doppel & 0x0000_1ff0) >> 4 // 9 bits
2531+
// | (doppel & 0x0000_000f)); // 4 bits
2532+
//
2533+
// Check j.l.Float.floatToFloat16 for more information.
2534+
// 10 bits
2535+
__ vnsrl_wi(vtmp, src, fp32_mantissa_2nd_part_bits + fp32_mantissa_3rd_part_bits, Assembler::v0_t);
2536+
__ mv(t0, 0x3ff); // retain first part of mantissa in a float 32
2537+
__ vand_vx(vtmp, vtmp, t0, Assembler::v0_t);
2538+
__ vor_vv(dst, dst, vtmp, Assembler::v0_t);
2539+
// 9 bits
2540+
__ vnsrl_wi(vtmp, src, fp32_mantissa_3rd_part_bits, Assembler::v0_t);
2541+
__ mv(t0, 0x1ff); // retain second part of mantissa in a float 32
2542+
__ vand_vx(vtmp, vtmp, t0, Assembler::v0_t);
2543+
__ vor_vv(dst, dst, vtmp, Assembler::v0_t);
2544+
// 4 bits
2545+
// Narrow shift is necessary to move data from 32 bits element to 16 bits element in vector register.
2546+
__ vnsrl_wi(vtmp, src, 0, Assembler::v0_t);
2547+
__ vand_vi(vtmp, vtmp, 0xf, Assembler::v0_t);
2548+
__ vor_vv(dst, dst, vtmp, Assembler::v0_t);
25102549

25112550
__ j(stub.continuation());
25122551
#undef __
25132552
}
25142553

25152554
// j.l.Float.float16ToFloat
2516-
void C2_MacroAssembler::float_to_float16_v(VectorRegister dst, VectorRegister src, VectorRegister vtmp,
2517-
Register tmp, uint vector_length) {
2555+
void C2_MacroAssembler::float_to_float16_v(VectorRegister dst, VectorRegister src,
2556+
VectorRegister vtmp, Register tmp, uint vector_length) {
25182557
assert_different_registers(dst, src, vtmp);
25192558

25202559
auto stub = C2CodeStub::make<VectorRegister, VectorRegister, VectorRegister>
2521-
(dst, src, vtmp, 28, float_to_float16_v_slow_path);
2560+
(dst, src, vtmp, 56, float_to_float16_v_slow_path);
25222561

25232562
// On riscv, NaN needs a special process as vfncvt_f_f_w does not work in that case.
25242563

25252564
vsetvli_helper(BasicType::T_FLOAT, vector_length, Assembler::m1);
25262565

25272566
// check whether there is a NaN.
2528-
// replace v_fclass with vmseq_vv as performance optimization.
2567+
// replace v_fclass with vmfne_vv as performance optimization.
25292568
vmfne_vv(v0, src, src);
25302569
vcpop_m(t0, v0);
25312570

test/hotspot/jtreg/compiler/vectorization/TestFloatConversionsVectorNaN.java

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323

2424
/**
2525
* @test
26+
* @key randomness
2627
* @bug 8320646
2728
* @summary Auto-vectorize Float.floatToFloat16, Float.float16ToFloat APIs, with NaN
2829
* @requires vm.compiler2.enabled
@@ -37,9 +38,11 @@
3738
package compiler.vectorization;
3839

3940
import java.util.HexFormat;
41+
import java.util.Random;
4042

4143
import compiler.lib.ir_framework.*;
4244
import jdk.test.lib.Asserts;
45+
import jdk.test.lib.Utils;
4346

4447
public class TestFloatConversionsVectorNaN {
4548
private static final int ARRLEN = 1024;
@@ -79,14 +82,16 @@ public void test_float_float16(short[] sout, float[] finp) {
7982

8083
@Run(test = {"test_float_float16"}, mode = RunMode.STANDALONE)
8184
public void kernel_test_float_float16() {
85+
Random rand = Utils.getRandomInstance();
8286
int errno = 0;
8387
finp = new float[ARRLEN];
8488
sout = new short[ARRLEN];
8589

8690
// Setup
8791
for (int i = 0; i < ARRLEN; i++) {
88-
if (i%39 == 0) {
89-
int x = 0x7f800000 + ((i/39) << 13);
92+
if (i%3 == 0) {
93+
int shift = rand.nextInt(13+1);
94+
int x = 0x7f800000 + ((i/39) << shift);
9095
x = (i%2 == 0) ? x : (x | 0x80000000);
9196
finp[i] = Float.intBitsToFloat(x);
9297
} else {
@@ -128,7 +133,8 @@ public void kernel_test_float_float16() {
128133

129134
static int assertEquals(int idx, float f, short expected, short actual) {
130135
HexFormat hf = HexFormat.of();
131-
String msg = "floatToFloat16 wrong result: idx: " + idx + ", \t" + f +
136+
String msg = "floatToFloat16 wrong result: idx: " + idx +
137+
", \t" + f + ", hex: " + Integer.toHexString(Float.floatToRawIntBits(f)) +
132138
",\t expected: " + hf.toHexDigits(expected) +
133139
",\t actual: " + hf.toHexDigits(actual);
134140
if ((expected & 0x7c00) != 0x7c00) {
@@ -167,7 +173,7 @@ public void kernel_test_float16_float() {
167173

168174
// Setup
169175
for (int i = 0; i < ARRLEN; i++) {
170-
if (i%39 == 0) {
176+
if (i%3 == 0) {
171177
int x = 0x7c00 + i;
172178
x = (i%2 == 0) ? x : (x | 0x8000);
173179
sinp[i] = (short)x;

0 commit comments

Comments
 (0)