Skip to content

Commit e23c817

Browse files
committed
8360179: RISC-V: Only enable BigInteger intrinsics when AvoidUnalignedAccess == false
Backport-of: 34412da52b41e9374168e67e3b6129576c8e4402
1 parent ee45ba9 commit e23c817

File tree

3 files changed

+30
-111
lines changed

3 files changed

+30
-111
lines changed

src/hotspot/cpu/riscv/macroAssembler_riscv.cpp

Lines changed: 2 additions & 99 deletions
Original file line numberDiff line numberDiff line change
@@ -5310,42 +5310,6 @@ void MacroAssembler::add2_with_carry(Register final_dest_hi, Register dest_hi, R
53105310
add(final_dest_hi, dest_hi, carry);
53115311
}
53125312

5313-
/**
5314-
* Multiply 32 bit by 32 bit first loop.
5315-
*/
5316-
void MacroAssembler::multiply_32_x_32_loop(Register x, Register xstart, Register x_xstart,
5317-
Register y, Register y_idx, Register z,
5318-
Register carry, Register product,
5319-
Register idx, Register kdx) {
5320-
// jlong carry, x[], y[], z[];
5321-
// for (int idx=ystart, kdx=ystart+1+xstart; idx >= 0; idx--, kdx--) {
5322-
// long product = y[idx] * x[xstart] + carry;
5323-
// z[kdx] = (int)product;
5324-
// carry = product >>> 32;
5325-
// }
5326-
// z[xstart] = (int)carry;
5327-
5328-
Label L_first_loop, L_first_loop_exit;
5329-
blez(idx, L_first_loop_exit);
5330-
5331-
shadd(t0, xstart, x, t0, LogBytesPerInt);
5332-
lwu(x_xstart, Address(t0, 0));
5333-
5334-
bind(L_first_loop);
5335-
subiw(idx, idx, 1);
5336-
shadd(t0, idx, y, t0, LogBytesPerInt);
5337-
lwu(y_idx, Address(t0, 0));
5338-
mul(product, x_xstart, y_idx);
5339-
add(product, product, carry);
5340-
srli(carry, product, 32);
5341-
subiw(kdx, kdx, 1);
5342-
shadd(t0, kdx, z, t0, LogBytesPerInt);
5343-
sw(product, Address(t0, 0));
5344-
bgtz(idx, L_first_loop);
5345-
5346-
bind(L_first_loop_exit);
5347-
}
5348-
53495313
/**
53505314
* Multiply 64 bit by 64 bit first loop.
53515315
*/
@@ -5562,77 +5526,16 @@ void MacroAssembler::multiply_to_len(Register x, Register xlen, Register y, Regi
55625526
const Register carry = tmp5;
55635527
const Register product = xlen;
55645528
const Register x_xstart = tmp0;
5529+
const Register jdx = tmp1;
55655530

55665531
mv(idx, ylen); // idx = ylen;
55675532
addw(kdx, xlen, ylen); // kdx = xlen+ylen;
55685533
mv(carry, zr); // carry = 0;
55695534

5570-
Label L_multiply_64_x_64_loop, L_done;
5571-
5535+
Label L_done;
55725536
subiw(xstart, xlen, 1);
55735537
bltz(xstart, L_done);
55745538

5575-
const Register jdx = tmp1;
5576-
5577-
if (AvoidUnalignedAccesses) {
5578-
int base_offset = arrayOopDesc::base_offset_in_bytes(T_INT);
5579-
assert((base_offset % (UseCompactObjectHeaders ? 4 :
5580-
(UseCompressedClassPointers ? 8 : 4))) == 0, "Must be");
5581-
5582-
if ((base_offset % 8) == 0) {
5583-
// multiply_64_x_64_loop emits 8-byte load/store to access two elements
5584-
// at a time from int arrays x and y. When base_offset is 8 bytes, these
5585-
// accesses are naturally aligned if both xlen and ylen are even numbers.
5586-
orr(t0, xlen, ylen);
5587-
test_bit(t0, t0, 0);
5588-
beqz(t0, L_multiply_64_x_64_loop);
5589-
}
5590-
5591-
Label L_second_loop_unaligned, L_third_loop, L_third_loop_exit;
5592-
5593-
multiply_32_x_32_loop(x, xstart, x_xstart, y, y_idx, z, carry, product, idx, kdx);
5594-
shadd(t0, xstart, z, t0, LogBytesPerInt);
5595-
sw(carry, Address(t0, 0));
5596-
5597-
bind(L_second_loop_unaligned);
5598-
mv(carry, zr);
5599-
mv(jdx, ylen);
5600-
subiw(xstart, xstart, 1);
5601-
bltz(xstart, L_done);
5602-
5603-
subi(sp, sp, 2 * wordSize);
5604-
sd(z, Address(sp, 0));
5605-
sd(zr, Address(sp, wordSize));
5606-
shadd(t0, xstart, z, t0, LogBytesPerInt);
5607-
addi(z, t0, 4);
5608-
shadd(t0, xstart, x, t0, LogBytesPerInt);
5609-
lwu(product, Address(t0, 0));
5610-
5611-
blez(jdx, L_third_loop_exit);
5612-
5613-
bind(L_third_loop);
5614-
subiw(jdx, jdx, 1);
5615-
shadd(t0, jdx, y, t0, LogBytesPerInt);
5616-
lwu(t0, Address(t0, 0));
5617-
mul(t1, t0, product);
5618-
add(t0, t1, carry);
5619-
shadd(tmp6, jdx, z, t1, LogBytesPerInt);
5620-
lwu(t1, Address(tmp6, 0));
5621-
add(t0, t0, t1);
5622-
sw(t0, Address(tmp6, 0));
5623-
srli(carry, t0, 32);
5624-
bgtz(jdx, L_third_loop);
5625-
5626-
bind(L_third_loop_exit);
5627-
ld(z, Address(sp, 0));
5628-
addi(sp, sp, 2 * wordSize);
5629-
shadd(t0, xstart, z, t0, LogBytesPerInt);
5630-
sw(carry, Address(t0, 0));
5631-
5632-
j(L_second_loop_unaligned);
5633-
}
5634-
5635-
bind(L_multiply_64_x_64_loop);
56365539
multiply_64_x_64_loop(x, xstart, x_xstart, y, y_idx, z, carry, product, idx, kdx);
56375540

56385541
Label L_second_loop_aligned;

src/hotspot/cpu/riscv/macroAssembler_riscv.hpp

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1382,10 +1382,6 @@ class MacroAssembler: public Assembler {
13821382
void adc(Register dst, Register src1, Register src2, Register carry);
13831383
void add2_with_carry(Register final_dest_hi, Register dest_hi, Register dest_lo,
13841384
Register src1, Register src2, Register carry);
1385-
void multiply_32_x_32_loop(Register x, Register xstart, Register x_xstart,
1386-
Register y, Register y_idx, Register z,
1387-
Register carry, Register product,
1388-
Register idx, Register kdx);
13891385
void multiply_64_x_64_loop(Register x, Register xstart, Register x_xstart,
13901386
Register y, Register y_idx, Register z,
13911387
Register carry, Register product,

src/hotspot/cpu/riscv/vm_version_riscv.cpp

Lines changed: 28 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -325,20 +325,40 @@ void VM_Version::c2_initialize() {
325325
FLAG_SET_DEFAULT(UseMulAddIntrinsic, true);
326326
}
327327

328-
if (FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) {
329-
FLAG_SET_DEFAULT(UseMultiplyToLenIntrinsic, true);
328+
if (!AvoidUnalignedAccesses) {
329+
if (FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) {
330+
FLAG_SET_DEFAULT(UseMultiplyToLenIntrinsic, true);
331+
}
332+
} else if (UseMultiplyToLenIntrinsic) {
333+
warning("Intrinsics for BigInteger.multiplyToLen() not available on this CPU.");
334+
FLAG_SET_DEFAULT(UseMultiplyToLenIntrinsic, false);
330335
}
331336

332-
if (FLAG_IS_DEFAULT(UseSquareToLenIntrinsic)) {
333-
FLAG_SET_DEFAULT(UseSquareToLenIntrinsic, true);
337+
if (!AvoidUnalignedAccesses) {
338+
if (FLAG_IS_DEFAULT(UseSquareToLenIntrinsic)) {
339+
FLAG_SET_DEFAULT(UseSquareToLenIntrinsic, true);
340+
}
341+
} else if (UseSquareToLenIntrinsic) {
342+
warning("Intrinsics for BigInteger.squareToLen() not available on this CPU.");
343+
FLAG_SET_DEFAULT(UseSquareToLenIntrinsic, false);
334344
}
335345

336-
if (FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) {
337-
FLAG_SET_DEFAULT(UseMontgomeryMultiplyIntrinsic, true);
346+
if (!AvoidUnalignedAccesses) {
347+
if (FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) {
348+
FLAG_SET_DEFAULT(UseMontgomeryMultiplyIntrinsic, true);
349+
}
350+
} else if (UseMontgomeryMultiplyIntrinsic) {
351+
warning("Intrinsics for BigInteger.montgomeryMultiply() not available on this CPU.");
352+
FLAG_SET_DEFAULT(UseMontgomeryMultiplyIntrinsic, false);
338353
}
339354

340-
if (FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) {
341-
FLAG_SET_DEFAULT(UseMontgomerySquareIntrinsic, true);
355+
if (!AvoidUnalignedAccesses) {
356+
if (FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) {
357+
FLAG_SET_DEFAULT(UseMontgomerySquareIntrinsic, true);
358+
}
359+
} else if (UseMontgomerySquareIntrinsic) {
360+
warning("Intrinsics for BigInteger.montgomerySquare() not available on this CPU.");
361+
FLAG_SET_DEFAULT(UseMontgomerySquareIntrinsic, false);
342362
}
343363

344364
// Adler32

0 commit comments

Comments
 (0)