Skip to content

Commit 8d05693

Browse files
feilongjiangRealFYang
authored andcommitted
8279213: riscv: RVB: Add zero/sign extend instructions
Reviewed-by: fyang
1 parent 1fcf564 commit 8d05693

15 files changed

+180
-90
lines changed

src/hotspot/cpu/riscv/assembler_riscv.hpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@
3030
#include "asm/register.hpp"
3131
#include "assembler_riscv.inline.hpp"
3232

33-
#define registerSize 64
33+
#define XLEN 64
3434

3535
// definitions of various symbolic names for machine registers
3636

@@ -1257,6 +1257,7 @@ enum operand_size { int8, int16, int32, uint32, int64 };
12571257
}
12581258

12591259
#include "assembler_riscv_v.hpp"
1260+
#include "assembler_riscv_b.hpp"
12601261

12611262
virtual ~Assembler() {}
12621263

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
/*
2+
* Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
3+
* Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved.
4+
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5+
*
6+
* This code is free software; you can redistribute it and/or modify it
7+
* under the terms of the GNU General Public License version 2 only, as
8+
* published by the Free Software Foundation.
9+
*
10+
* This code is distributed in the hope that it will be useful, but WITHOUT
11+
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12+
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13+
* version 2 for more details (a copy is included in the LICENSE file that
14+
* accompanied this code).
15+
*
16+
* You should have received a copy of the GNU General Public License version
17+
* 2 along with this work; if not, write to the Free Software Foundation,
18+
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
19+
*
20+
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
21+
* or visit www.oracle.com if you need additional information or have any
22+
* questions.
23+
*
24+
*/
25+
26+
#ifndef CPU_RISCV_ASSEMBLER_RISCV_B_HPP
27+
#define CPU_RISCV_ASSEMBLER_RISCV_B_HPP
28+
29+
#define INSN(NAME, op, funct3, funct7) \
30+
void NAME(Register Rd, Register Rs1, Register Rs2) { \
31+
unsigned insn = 0; \
32+
patch((address)&insn, 6, 0, op); \
33+
patch((address)&insn, 14, 12, funct3); \
34+
patch((address)&insn, 31, 25, funct7); \
35+
patch_reg((address)&insn, 7, Rd); \
36+
patch_reg((address)&insn, 15, Rs1); \
37+
patch_reg((address)&insn, 20, Rs2); \
38+
emit(insn); \
39+
}
40+
41+
INSN(add_uw, 0b0111011, 0b000, 0b0000100);
42+
43+
#undef INSN
44+
45+
#define INSN(NAME, op, funct3, funct12) \
46+
void NAME(Register Rd, Register Rs1) { \
47+
unsigned insn = 0; \
48+
patch((address)&insn, 6, 0, op); \
49+
patch((address)&insn, 14, 12, funct3); \
50+
patch((address)&insn, 31, 20, funct12); \
51+
patch_reg((address)&insn, 7, Rd); \
52+
patch_reg((address)&insn, 15, Rs1); \
53+
emit(insn); \
54+
}
55+
56+
INSN(sext_b, 0b0010011, 0b001, 0b011000000100);
57+
INSN(sext_h, 0b0010011, 0b001, 0b011000000101);
58+
INSN(zext_h, 0b0111011, 0b100, 0b000010000000);
59+
60+
#undef INSN
61+
62+
// RVB pseudo instructions
63+
// zero extend word
64+
void zext_w(Register Rd, Register Rs) {
65+
add_uw(Rd, Rs, zr);
66+
}
67+
68+
69+
#endif // CPU_RISCV_ASSEMBLER_RISCV_B_HPP

src/hotspot/cpu/riscv/c1_LIRAssembler_arith_riscv.cpp

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -64,8 +64,7 @@ void LIR_Assembler::arithmetic_idiv(LIR_Code code, LIR_Opr left, LIR_Opr right,
6464
if (is_imm_in_range(c - 1, 12, 0)) {
6565
__ andi(t1, t1, c - 1);
6666
} else {
67-
__ slli(t1, t1, registerSize - shift);
68-
__ srli(t1, t1, registerSize - shift);
67+
__ zero_extend(t1, t1, shift);
6968
}
7069
__ subw(dreg, t1, t0);
7170
}
@@ -79,8 +78,7 @@ void LIR_Assembler::arithmetic_idiv(LIR_Code code, LIR_Opr left, LIR_Opr right,
7978
if (is_imm_in_range(c - 1, 12, 0)) {
8079
__ andi(t0, t0, c - 1);
8180
} else {
82-
__ slli(t0, t0, registerSize - shift);
83-
__ srli(t0, t0, registerSize - shift);
81+
__ zero_extend(t0, t0, shift);
8482
}
8583
__ addw(dreg, t0, lreg);
8684
__ sraiw(dreg, dreg, shift);
@@ -203,8 +201,7 @@ void LIR_Assembler::arith_op_double_cpu(LIR_Code code, LIR_Opr left, LIR_Opr rig
203201
if (is_imm_in_range(c - 1, 12, 0)) {
204202
__ andi(t0, t0, c - 1);
205203
} else {
206-
__ slli(t0, t0, registerSize - shift);
207-
__ srli(t0, t0, registerSize - shift);
204+
__ zero_extend(t0, t0, shift);
208205
}
209206
__ add(dreg, t0, lreg_lo);
210207
__ srai(dreg, dreg, shift);
@@ -223,8 +220,7 @@ void LIR_Assembler::arith_op_double_cpu(LIR_Code code, LIR_Opr left, LIR_Opr rig
223220
if (is_imm_in_range(c - 1, 12, 0)) {
224221
__ andi(t1, t1, c - 1);
225222
} else {
226-
__ slli(t1, t1, registerSize - shift);
227-
__ srli(t1, t1, registerSize - shift);
223+
__ zero_extend(t1, t1, shift);
228224
}
229225
__ sub(dreg, t1, t0);
230226
}

src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -951,13 +951,13 @@ void LIR_Assembler::emit_opConvert(LIR_OpConvert* op) {
951951
case Bytecodes::_d2f:
952952
__ fcvt_s_d(dest->as_float_reg(), src->as_double_reg()); break;
953953
case Bytecodes::_i2c:
954-
__ zero_ext(dest->as_register(), src->as_register(), registerSize - 16); break; // 16: char size
954+
__ zero_extend(dest->as_register(), src->as_register(), 16); break;
955955
case Bytecodes::_i2l:
956956
__ addw(dest->as_register_lo(), src->as_register(), zr); break;
957957
case Bytecodes::_i2s:
958-
__ sign_ext(dest->as_register(), src->as_register(), registerSize - 16); break; // 16: short size
958+
__ sign_extend(dest->as_register(), src->as_register(), 16); break;
959959
case Bytecodes::_i2b:
960-
__ sign_ext(dest->as_register(), src->as_register(), registerSize - 8); break; // 8: byte size
960+
__ sign_extend(dest->as_register(), src->as_register(), 8); break;
961961
case Bytecodes::_l2i:
962962
_masm->block_comment("FIXME: This coulde be no-op");
963963
__ addw(dest->as_register(), src->as_register_lo(), zr); break;

src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -819,8 +819,8 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
819819
__ sll(arr_size, length, t0);
820820
int lh_header_size_width = log2i_exact(Klass::_lh_header_size_mask + 1);
821821
int lh_header_size_msb = Klass::_lh_header_size_shift + lh_header_size_width;
822-
__ slli(tmp1, tmp1, registerSize - lh_header_size_msb);
823-
__ srli(tmp1, tmp1, registerSize - lh_header_size_width);
822+
__ slli(tmp1, tmp1, XLEN - lh_header_size_msb);
823+
__ srli(tmp1, tmp1, XLEN - lh_header_size_width);
824824
__ add(arr_size, arr_size, tmp1);
825825
__ addi(arr_size, arr_size, MinObjAlignmentInBytesMask); // align up
826826
__ andi(arr_size, arr_size, ~(uint)MinObjAlignmentInBytesMask);

src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -450,10 +450,10 @@ void C2_MacroAssembler::string_indexof(Register haystack, Register needle,
450450
// convert Latin1 to UTF. eg: 0x0000abcd -> 0x0a0b0c0d
451451
// We'll have to wait until load completed, but it's still faster than per-character loads+checks
452452
srli(tmp3, tmp6, BitsPerByte * (wordSize / 2 - needle_chr_size)); // pattern[m-1], eg:0x0000000a
453-
slli(ch2, tmp6, registerSize - 24);
454-
srli(ch2, ch2, registerSize - 8); // pattern[m-2], 0x0000000b
455-
slli(ch1, tmp6, registerSize - 16);
456-
srli(ch1, ch1, registerSize - 8); // pattern[m-3], 0x0000000c
453+
slli(ch2, tmp6, XLEN - 24);
454+
srli(ch2, ch2, XLEN - 8); // pattern[m-2], 0x0000000b
455+
slli(ch1, tmp6, XLEN - 16);
456+
srli(ch1, ch1, XLEN - 8); // pattern[m-3], 0x0000000c
457457
andi(tmp6, tmp6, 0xff); // pattern[m-4], 0x0000000d
458458
slli(ch2, ch2, 16);
459459
orr(ch2, ch2, ch1); // 0x00000b0c

src/hotspot/cpu/riscv/globals_riscv.hpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,7 @@ define_pd_global(intx, InlineSmallCode, 1000);
9090
"Extend fence.i to fence.i + fence.") \
9191
product(bool, AvoidUnalignedAccesses, true, \
9292
"Avoid generating unaligned memory accesses") \
93-
product(bool, UseRVV, false, EXPERIMENTAL, "Use RVV instructions")
93+
product(bool, UseRVV, false, EXPERIMENTAL, "Use RVV instructions") \
94+
product(bool, UseRVB, false, EXPERIMENTAL, "Use RVB instructions")
9495

9596
#endif // CPU_RISCV_GLOBALS_RISCV_HPP

src/hotspot/cpu/riscv/interp_masm_riscv.cpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -67,17 +67,17 @@ void InterpreterMacroAssembler::narrow(Register result) {
6767
bind(notBool);
6868
mv(t1, T_BYTE);
6969
bne(t0, t1, notByte);
70-
sign_ext(result, result, registerSize - 8);
70+
sign_extend(result, result, 8);
7171
j(done);
7272

7373
bind(notByte);
7474
mv(t1, T_CHAR);
7575
bne(t0, t1, notChar);
76-
zero_ext(result, result, registerSize - 16); // turncate upper 48 bits
76+
zero_extend(result, result, 16);
7777
j(done);
7878

7979
bind(notChar);
80-
sign_ext(result, result, registerSize - 16); // sign-extend short
80+
sign_extend(result, result, 16);
8181

8282
// Nothing to do for T_INT
8383
bind(done);
@@ -250,8 +250,8 @@ void InterpreterMacroAssembler::get_cache_and_index_and_bytecode_at_bcp(Register
250250
lwu(bytecode, bytecode);
251251
membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore);
252252
const int shift_count = (1 + byte_no) * BitsPerByte;
253-
slli(bytecode, bytecode, registerSize - (shift_count + BitsPerByte));
254-
srli(bytecode, bytecode, registerSize - BitsPerByte);
253+
slli(bytecode, bytecode, XLEN - (shift_count + BitsPerByte));
254+
srli(bytecode, bytecode, XLEN - BitsPerByte);
255255
}
256256

257257
void InterpreterMacroAssembler::get_cache_entry_pointer_at_bcp(Register cache,

src/hotspot/cpu/riscv/macroAssembler_riscv.cpp

Lines changed: 47 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -683,6 +683,10 @@ void MacroAssembler::sext_w(Register Rd, Register Rs) {
683683
addiw(Rd, Rs, 0);
684684
}
685685

686+
void MacroAssembler::zext_b(Register Rd, Register Rs) {
687+
andi(Rd, Rs, 0xFF);
688+
}
689+
686690
void MacroAssembler::seqz(Register Rd, Register Rs) {
687691
sltiu(Rd, Rs, 1);
688692
}
@@ -1902,7 +1906,7 @@ void MacroAssembler::encode_klass_not_null(Register dst, Register src, Register
19021906

19031907
if (((uint64_t)(uintptr_t)CompressedKlassPointers::base() & 0xffffffff) == 0 &&
19041908
CompressedKlassPointers::shift() == 0) {
1905-
zero_ext(dst, src, 32); // clear upper 32 bits
1909+
zero_extend(dst, src, 32);
19061910
return;
19071911
}
19081912

@@ -2219,7 +2223,7 @@ void MacroAssembler::load_reserved(Register addr,
22192223
break;
22202224
case uint32:
22212225
lr_w(t0, addr, acquire);
2222-
clear_upper_bits(t0, 32);
2226+
zero_extend(t0, t0, 32);
22232227
break;
22242228
default:
22252229
ShouldNotReachHere();
@@ -2262,7 +2266,7 @@ void MacroAssembler::cmpxchg_narrow_value_helper(Register addr, Register expecte
22622266
} else {
22632267
// size == int16 case
22642268
addi(mask, zr, -1);
2265-
zero_ext(mask, mask, registerSize - 16);
2269+
zero_extend(mask, mask, 16);
22662270
}
22672271
sll(mask, mask, shift);
22682272

@@ -2315,10 +2319,10 @@ void MacroAssembler::cmpxchg_narrow_value(Register addr, Register expected,
23152319
srl(result, tmp, shift);
23162320

23172321
if (size == int8) {
2318-
sign_ext(result, result, registerSize - 8);
2322+
sign_extend(result, result, 8);
23192323
} else {
23202324
// size == int16 case
2321-
sign_ext(result, result, registerSize - 16);
2325+
sign_extend(result, result, 16);
23222326
}
23232327
}
23242328
}
@@ -2448,7 +2452,7 @@ ATOMIC_XCHG(xchgalw, amoswap_w, Assembler::aq, Assembler::rl)
24482452
#define ATOMIC_XCHGU(OP1, OP2) \
24492453
void MacroAssembler::atomic_##OP1(Register prev, Register newv, Register addr) { \
24502454
atomic_##OP2(prev, newv, addr); \
2451-
clear_upper_bits(prev, 32); \
2455+
zero_extend(prev, prev, 32); \
24522456
return; \
24532457
}
24542458

@@ -2827,7 +2831,7 @@ void MacroAssembler::set_narrow_oop(Register dst, jobject obj) {
28272831
RelocationHolder rspec = oop_Relocation::spec(oop_index);
28282832
code_section()->relocate(inst_mark(), rspec);
28292833
li32(dst, 0xDEADBEEF);
2830-
clear_upper_bits(dst, 32); // clear upper 32bit, do not sign extend.
2834+
zero_extend(dst, dst, 32);
28312835
}
28322836

28332837
void MacroAssembler::set_narrow_klass(Register dst, Klass* k) {
@@ -2841,7 +2845,7 @@ void MacroAssembler::set_narrow_klass(Register dst, Klass* k) {
28412845
code_section()->relocate(inst_mark(), rspec);
28422846
narrowKlass nk = CompressedKlassPointers::encode(k);
28432847
li32(dst, nk);
2844-
clear_upper_bits(dst, 32); // clear upper 32bit, do not sign extend.
2848+
zero_extend(dst, dst, 32);
28452849
}
28462850

28472851
// Maybe emit a call via a trampoline. If the code cache is small
@@ -3050,7 +3054,7 @@ void MacroAssembler::mul_add(Register out, Register in, Register offset,
30503054
mv(tmp, out);
30513055
mv(out, zr);
30523056
beqz(len, L_end);
3053-
zero_ext(k, k, 32);
3057+
zero_extend(k, k, 32);
30543058
slli(t0, offset, LogBytesPerInt);
30553059
add(offset, tmp, t0);
30563060
slli(t0, len, LogBytesPerInt);
@@ -3427,14 +3431,42 @@ void MacroAssembler::zero_memory(Register addr, Register len, Register tmp1) {
34273431
bnez(len, loop);
34283432
}
34293433

3430-
void MacroAssembler::zero_ext(Register dst, Register src, int clear_bits) {
3431-
slli(dst, src, clear_bits);
3432-
srli(dst, dst, clear_bits);
3434+
void MacroAssembler::zero_extend(Register dst, Register src, int bits) {
3435+
if (UseRVB) {
3436+
if (bits == 16) {
3437+
zext_h(dst, src);
3438+
return;
3439+
} else if (bits == 32) {
3440+
zext_w(dst, src);
3441+
return;
3442+
}
3443+
}
3444+
3445+
if (bits == 8) {
3446+
zext_b(dst, src);
3447+
} else {
3448+
slli(dst, src, XLEN - bits);
3449+
srli(dst, dst, XLEN - bits);
3450+
}
34333451
}
34343452

3435-
void MacroAssembler::sign_ext(Register dst, Register src, int clear_bits) {
3436-
slli(dst, src, clear_bits);
3437-
srai(dst, dst, clear_bits);
3453+
void MacroAssembler::sign_extend(Register dst, Register src, int bits) {
3454+
if (UseRVB) {
3455+
if (bits == 8) {
3456+
sext_b(dst, src);
3457+
return;
3458+
} else if (bits == 16) {
3459+
sext_h(dst, src);
3460+
return;
3461+
}
3462+
}
3463+
3464+
if (bits == 32) {
3465+
sext_w(dst, src);
3466+
} else {
3467+
slli(dst, src, XLEN - bits);
3468+
srai(dst, dst, XLEN - bits);
3469+
}
34383470
}
34393471

34403472
void MacroAssembler::cmp_l2i(Register dst, Register src1, Register src2, Register tmp)

0 commit comments

Comments
 (0)