Skip to content

Commit 3c6ffca

Browse files
author
Hamlin Li
committed
8318219: RISC-V: C2 ExpandBits
Reviewed-by: fyang
1 parent 77a6966 commit 3c6ffca

File tree

5 files changed

+163
-3
lines changed

5 files changed

+163
-3
lines changed

src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1714,6 +1714,45 @@ void C2_MacroAssembler::compress_bits_l_v(Register dst, Register src, Register m
17141714
compress_bits_v(dst, src, mask, /* is_long */ true);
17151715
}
17161716

1717+
void C2_MacroAssembler::expand_bits_v(Register dst, Register src, Register mask, bool is_long) {
1718+
Assembler::SEW sew = is_long ? Assembler::e64 : Assembler::e32;
1719+
// intrinsic is enabled when MaxVectorSize >= 16
1720+
Assembler::LMUL lmul = is_long ? Assembler::m4 : Assembler::m2;
1721+
long len = is_long ? 64 : 32;
1722+
1723+
// load the src data(in bits) to be expanded.
1724+
vsetivli(x0, 1, sew, Assembler::m1);
1725+
vmv_s_x(v0, src);
1726+
// reset the src data(in bytes) to zero.
1727+
mv(t0, len);
1728+
vsetvli(x0, t0, Assembler::e8, lmul);
1729+
vmv_v_i(v4, 0);
1730+
// convert the src data from bits to bytes.
1731+
vmerge_vim(v4, v4, 1); // v0 as implicit mask register
1732+
// reset the dst data(in bytes) to zero.
1733+
vmv_v_i(v12, 0);
1734+
// load the mask data(in bits).
1735+
vsetivli(x0, 1, sew, Assembler::m1);
1736+
vmv_s_x(v0, mask);
1737+
// expand the src data(in bytes) to dst(in bytes).
1738+
vsetvli(x0, t0, Assembler::e8, lmul);
1739+
viota_m(v8, v0);
1740+
vrgather_vv(v12, v4, v8, VectorMask::v0_t); // v0 as implicit mask register
1741+
// convert the dst data from bytes to bits.
1742+
vmseq_vi(v0, v12, 1);
1743+
// store result back.
1744+
vsetivli(x0, 1, sew, Assembler::m1);
1745+
vmv_x_s(dst, v0);
1746+
}
1747+
1748+
void C2_MacroAssembler::expand_bits_i_v(Register dst, Register src, Register mask) {
1749+
expand_bits_v(dst, src, mask, /* is_long */ false);
1750+
}
1751+
1752+
void C2_MacroAssembler::expand_bits_l_v(Register dst, Register src, Register mask) {
1753+
expand_bits_v(dst, src, mask, /* is_long */ true);
1754+
}
1755+
17171756
void C2_MacroAssembler::element_compare(Register a1, Register a2, Register result, Register cnt, Register tmp1, Register tmp2,
17181757
VectorRegister vr1, VectorRegister vr2, VectorRegister vrs, bool islatin, Label &DONE) {
17191758
Label loop;

src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.hpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@
4040
bool is_latin, Label& DONE);
4141

4242
void compress_bits_v(Register dst, Register src, Register mask, bool is_long);
43+
void expand_bits_v(Register dst, Register src, Register mask, bool is_long);
4344

4445
public:
4546
// Code used by cmpFastLock and cmpFastUnlock mach instructions in .ad file.
@@ -167,6 +168,9 @@
167168
// compress bits, i.e. j.l.Integer/Long::compress.
168169
void compress_bits_i_v(Register dst, Register src, Register mask);
169170
void compress_bits_l_v(Register dst, Register src, Register mask);
171+
// expand bits, i.e. j.l.Integer/Long::expand.
172+
void expand_bits_i_v(Register dst, Register src, Register mask);
173+
void expand_bits_l_v(Register dst, Register src, Register mask);
170174

171175
void string_equals_v(Register r1, Register r2,
172176
Register result, Register cnt1,

src/hotspot/cpu/riscv/riscv.ad

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -934,6 +934,26 @@ reg_class v11_reg(
934934
V11, V11_H, V11_J, V11_K
935935
);
936936

937+
// class for vector register v12
938+
reg_class v12_reg(
939+
V12, V12_H, V12_J, V12_K
940+
);
941+
942+
// class for vector register v13
943+
reg_class v13_reg(
944+
V13, V13_H, V13_J, V13_K
945+
);
946+
947+
// class for vector register v14
948+
reg_class v14_reg(
949+
V14, V14_H, V14_J, V14_K
950+
);
951+
952+
// class for vector register v15
953+
reg_class v15_reg(
954+
V15, V15_H, V15_J, V15_K
955+
);
956+
937957
// class for condition codes
938958
reg_class reg_flags(RFLAGS);
939959

@@ -1888,6 +1908,7 @@ bool Matcher::match_rule_supported(int opcode) {
18881908
}
18891909
break;
18901910

1911+
case Op_ExpandBits: // fall through
18911912
case Op_CompressBits: // fall through
18921913
guarantee(UseRVV == (MaxVectorSize >= 16), "UseRVV and MaxVectorSize not matched");
18931914
case Op_StrCompressedCopy: // fall through
@@ -3508,6 +3529,46 @@ operand vReg_V11()
35083529
interface(REG_INTER);
35093530
%}
35103531

3532+
operand vReg_V12()
3533+
%{
3534+
constraint(ALLOC_IN_RC(v12_reg));
3535+
match(VecA);
3536+
match(vReg);
3537+
op_cost(0);
3538+
format %{ %}
3539+
interface(REG_INTER);
3540+
%}
3541+
3542+
operand vReg_V13()
3543+
%{
3544+
constraint(ALLOC_IN_RC(v13_reg));
3545+
match(VecA);
3546+
match(vReg);
3547+
op_cost(0);
3548+
format %{ %}
3549+
interface(REG_INTER);
3550+
%}
3551+
3552+
operand vReg_V14()
3553+
%{
3554+
constraint(ALLOC_IN_RC(v14_reg));
3555+
match(VecA);
3556+
match(vReg);
3557+
op_cost(0);
3558+
format %{ %}
3559+
interface(REG_INTER);
3560+
%}
3561+
3562+
operand vReg_V15()
3563+
%{
3564+
constraint(ALLOC_IN_RC(v15_reg));
3565+
match(VecA);
3566+
match(vReg);
3567+
op_cost(0);
3568+
format %{ %}
3569+
interface(REG_INTER);
3570+
%}
3571+
35113572
operand vRegMask()
35123573
%{
35133574
constraint(ALLOC_IN_RC(vmask_reg));

src/hotspot/cpu/riscv/riscv_v.ad

Lines changed: 57 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2884,7 +2884,6 @@ instruct vclearArray_reg_reg(iRegL_R29 cnt, iRegP_R28 base, Universe dummy,
28842884

28852885
instruct compressBitsI(iRegINoSp dst, iRegIorL2I src, iRegIorL2I mask, vRegMask_V0 v0,
28862886
vReg_V4 v4, vReg_V5 v5, vReg_V8 v8, vReg_V9 v9) %{
2887-
predicate(UseRVV);
28882887
match(Set dst (CompressBits src mask));
28892888
effect(TEMP v0, TEMP v4, TEMP v5, TEMP v8, TEMP v9);
28902889
format %{ "vsetivli x0, 1, e32, m1, tu, mu\t#@compressBitsI\n\t"
@@ -2911,7 +2910,6 @@ instruct compressBitsI(iRegINoSp dst, iRegIorL2I src, iRegIorL2I mask, vRegMask_
29112910
instruct compressBitsL(iRegLNoSp dst, iRegL src, iRegL mask, vRegMask_V0 v0,
29122911
vReg_V4 v4, vReg_V5 v5, vReg_V6 v6, vReg_V7 v7,
29132912
vReg_V8 v8, vReg_V9 v9, vReg_V10 v10, vReg_V11 v11) %{
2914-
predicate(UseRVV);
29152913
match(Set dst (CompressBits src mask));
29162914
effect(TEMP v0, TEMP v4, TEMP v5, TEMP v6, TEMP v7, TEMP v8, TEMP v9, TEMP v10, TEMP v11);
29172915
format %{ "vsetivli x0, 1, e64, m1, tu, mu\t#@compressBitsL\n\t"
@@ -2935,6 +2933,63 @@ instruct compressBitsL(iRegLNoSp dst, iRegL src, iRegL mask, vRegMask_V0 v0,
29352933
ins_pipe(pipe_slow);
29362934
%}
29372935

2936+
// ExpandBits of Long & Integer
2937+
2938+
instruct expandBitsI(iRegINoSp dst, iRegIorL2I src, iRegIorL2I mask, vRegMask_V0 v0,
2939+
vReg_V4 v4, vReg_V5 v5, vReg_V8 v8, vReg_V9 v9, vReg_V12 v12, vReg_V13 v13) %{
2940+
match(Set dst (ExpandBits src mask));
2941+
effect(TEMP v0, TEMP v4, TEMP v5, TEMP v8, TEMP v9, TEMP v12, TEMP v13);
2942+
format %{ "vsetivli x0, 1, e32, m1, tu, mu\t#@expandBitsI\n\t"
2943+
"vmv.s.x $v0, $src\n\t"
2944+
"mv t0, 32\n\t"
2945+
"vsetvli x0, t0, e8, m2, tu, mu\n\t"
2946+
"vmv.v.i $v4, 0\n\t"
2947+
"vmerge.vim $v4, $v4, 1, $v0\n\t"
2948+
"vmv.v.i $v12, 0\n\t"
2949+
"vsetivli x0, 1, e32, m1, tu, mu\n\t"
2950+
"vmv.s.x $v0, $mask\n\t"
2951+
"vsetvli x0, t0, e8, m2, tu, mu\n\t"
2952+
"viota.m $v8, $v0\n\t"
2953+
"vrgather.vv $v12, $v4, $v8, $v0.t\n\t"
2954+
"vmseq.vi $v0, $v12, 1\n\t"
2955+
"vsetivli x0, 1, e32, m1, tu, mu\n\t"
2956+
"vmv.x.s $dst, $v0\t#@expandBitsI\n\t"
2957+
%}
2958+
ins_encode %{
2959+
__ expand_bits_i_v(as_Register($dst$$reg), as_Register($src$$reg), as_Register($mask$$reg));
2960+
%}
2961+
ins_pipe(pipe_slow);
2962+
%}
2963+
2964+
instruct expandBitsL(iRegLNoSp dst, iRegL src, iRegL mask, vRegMask_V0 v0,
2965+
vReg_V4 v4, vReg_V5 v5, vReg_V6 v6, vReg_V7 v7,
2966+
vReg_V8 v8, vReg_V9 v9, vReg_V10 v10, vReg_V11 v11,
2967+
vReg_V12 v12, vReg_V13 v13, vReg_V14 v14, vReg_V15 v15) %{
2968+
match(Set dst (ExpandBits src mask));
2969+
effect(TEMP v0, TEMP v4, TEMP v5, TEMP v6, TEMP v7, TEMP v8, TEMP v9, TEMP v10, TEMP v11,
2970+
TEMP v12, TEMP v13, TEMP v14, TEMP v15);
2971+
format %{ "vsetivli x0, 1, e64, m1, tu, mu\t#@expandBitsL\n\t"
2972+
"vmv.s.x $v0, $src\n\t"
2973+
"mv t0, 64\n\t"
2974+
"vsetvli x0, t0, e8, m4, tu, mu\n\t"
2975+
"vmv.v.i $v4, 0\n\t"
2976+
"vmerge.vim $v4, $v4, 1, $v0\n\t"
2977+
"vmv.v.i $v12, 0\n\t"
2978+
"vsetivli x0, 1, e64, m1, tu, mu\n\t"
2979+
"vmv.s.x $v0, $mask\n\t"
2980+
"vsetvli x0, t0, e8, m4, tu, mu\n\t"
2981+
"viota.m $v8, $v0\n\t"
2982+
"vrgather.vv $v12, $v4, $v8, $v0.t\n\t"
2983+
"vmseq.vi $v0, $v12, 1\n\t"
2984+
"vsetivli x0, 1, e64, m1, tu, mu\n\t"
2985+
"vmv.x.s $dst, $v0\t#@expandBitsL\n\t"
2986+
%}
2987+
ins_encode %{
2988+
__ expand_bits_l_v(as_Register($dst$$reg), as_Register($src$$reg), as_Register($mask$$reg));
2989+
%}
2990+
ins_pipe(pipe_slow);
2991+
%}
2992+
29382993
// Vector Load Const
29392994
instruct vloadcon(vReg dst, immI0 src) %{
29402995
match(Set dst (VectorLoadConst src));

test/hotspot/jtreg/compiler/intrinsics/TestBitShuffleOpers.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,8 @@
3030
* @requires (((os.arch=="x86" | os.arch=="amd64" | os.arch=="x86_64") &
3131
* (vm.cpu.features ~= ".*bmi2.*" & vm.cpu.features ~= ".*bmi1.*" &
3232
* vm.cpu.features ~= ".*sse2.*")) |
33-
* (os.arch=="aarch64" & vm.cpu.features ~= ".*svebitperm.*"))
33+
* (os.arch=="aarch64" & vm.cpu.features ~= ".*svebitperm.*") |
34+
* (os.arch=="riscv64" & vm.cpu.features ~= ".*v,.*"))
3435
* @library /test/lib /
3536
* @run driver compiler.intrinsics.TestBitShuffleOpers
3637
*/

0 commit comments

Comments
 (0)