Skip to content

Commit a269bef

Browse files
author
Volodymyr Paprotski
committed
8350459: MontgomeryIntegerPolynomialP256 multiply intrinsic with AVX2 on x86_64
Reviewed-by: ascarpino, sviswanathan
1 parent c029220 commit a269bef

File tree

9 files changed

+744
-119
lines changed

9 files changed

+744
-119
lines changed

src/hotspot/cpu/x86/assembler_x86.cpp

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3529,6 +3529,30 @@ void Assembler::vmovdqu(Address dst, XMMRegister src) {
35293529
emit_operand(src, dst, 0);
35303530
}
35313531

3532+
// Move Aligned 256bit Vector
3533+
void Assembler::vmovdqa(XMMRegister dst, Address src) {
3534+
assert(UseAVX > 0, "");
3535+
InstructionMark im(this);
3536+
InstructionAttr attributes(AVX_256bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
3537+
attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
3538+
vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3539+
emit_int8(0x6F);
3540+
emit_operand(dst, src, 0);
3541+
}
3542+
3543+
void Assembler::vmovdqa(Address dst, XMMRegister src) {
3544+
assert(UseAVX > 0, "");
3545+
InstructionMark im(this);
3546+
InstructionAttr attributes(AVX_256bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
3547+
attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
3548+
attributes.reset_is_clear_context();
3549+
// swap src<->dst for encoding
3550+
assert(src != xnoreg, "sanity");
3551+
vex_prefix(dst, 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3552+
emit_int8(0x7F);
3553+
emit_operand(src, dst, 0);
3554+
}
3555+
35323556
void Assembler::vpmaskmovd(XMMRegister dst, XMMRegister mask, Address src, int vector_len) {
35333557
assert((VM_Version::supports_avx2() && vector_len == AVX_256bit), "");
35343558
InstructionMark im(this);
@@ -3791,6 +3815,27 @@ void Assembler::evmovdquq(XMMRegister dst, KRegister mask, Address src, bool mer
37913815
emit_operand(dst, src, 0);
37923816
}
37933817

3818+
// Move Aligned 512bit Vector
3819+
void Assembler::evmovdqaq(XMMRegister dst, Address src, int vector_len) {
3820+
// Unmasked instruction
3821+
evmovdqaq(dst, k0, src, /*merge*/ false, vector_len);
3822+
}
3823+
3824+
void Assembler::evmovdqaq(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len) {
3825+
assert(VM_Version::supports_evex(), "");
3826+
InstructionMark im(this);
3827+
InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
3828+
attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
3829+
attributes.set_embedded_opmask_register_specifier(mask);
3830+
attributes.set_is_evex_instruction();
3831+
if (merge) {
3832+
attributes.reset_is_clear_context();
3833+
}
3834+
vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3835+
emit_int8(0x6F);
3836+
emit_operand(dst, src, 0);
3837+
}
3838+
37943839
void Assembler::evmovntdquq(Address dst, XMMRegister src, int vector_len) {
37953840
// Unmasked instruction
37963841
evmovntdquq(dst, k0, src, /*merge*/ true, vector_len);

src/hotspot/cpu/x86/assembler_x86.hpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1758,6 +1758,10 @@ class Assembler : public AbstractAssembler {
17581758
void vmovdqu(XMMRegister dst, Address src);
17591759
void vmovdqu(XMMRegister dst, XMMRegister src);
17601760

1761+
// Move Aligned 256bit Vector
1762+
void vmovdqa(XMMRegister dst, Address src);
1763+
void vmovdqa(Address dst, XMMRegister src);
1764+
17611765
// Move Unaligned 512bit Vector
17621766
void evmovdqub(XMMRegister dst, XMMRegister src, int vector_len);
17631767
void evmovdqub(XMMRegister dst, Address src, int vector_len);
@@ -1791,6 +1795,10 @@ class Assembler : public AbstractAssembler {
17911795
void evmovdquq(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len);
17921796
void evmovdquq(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
17931797

1798+
// Move Aligned 512bit Vector
1799+
void evmovdqaq(XMMRegister dst, Address src, int vector_len);
1800+
void evmovdqaq(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len);
1801+
17941802
// Move lower 64bit to high 64bit in 128bit register
17951803
void movlhps(XMMRegister dst, XMMRegister src);
17961804

src/hotspot/cpu/x86/macroAssembler_x86.cpp

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2720,6 +2720,60 @@ void MacroAssembler::vmovdqu(XMMRegister dst, AddressLiteral src, int vector_len
27202720
}
27212721
}
27222722

2723+
void MacroAssembler::vmovdqu(XMMRegister dst, XMMRegister src, int vector_len) {
2724+
if (vector_len == AVX_512bit) {
2725+
evmovdquq(dst, src, AVX_512bit);
2726+
} else if (vector_len == AVX_256bit) {
2727+
vmovdqu(dst, src);
2728+
} else {
2729+
movdqu(dst, src);
2730+
}
2731+
}
2732+
2733+
void MacroAssembler::vmovdqu(Address dst, XMMRegister src, int vector_len) {
2734+
if (vector_len == AVX_512bit) {
2735+
evmovdquq(dst, src, AVX_512bit);
2736+
} else if (vector_len == AVX_256bit) {
2737+
vmovdqu(dst, src);
2738+
} else {
2739+
movdqu(dst, src);
2740+
}
2741+
}
2742+
2743+
void MacroAssembler::vmovdqu(XMMRegister dst, Address src, int vector_len) {
2744+
if (vector_len == AVX_512bit) {
2745+
evmovdquq(dst, src, AVX_512bit);
2746+
} else if (vector_len == AVX_256bit) {
2747+
vmovdqu(dst, src);
2748+
} else {
2749+
movdqu(dst, src);
2750+
}
2751+
}
2752+
2753+
void MacroAssembler::vmovdqa(XMMRegister dst, AddressLiteral src, Register rscratch) {
2754+
assert(rscratch != noreg || always_reachable(src), "missing");
2755+
2756+
if (reachable(src)) {
2757+
vmovdqa(dst, as_Address(src));
2758+
}
2759+
else {
2760+
lea(rscratch, src);
2761+
vmovdqa(dst, Address(rscratch, 0));
2762+
}
2763+
}
2764+
2765+
void MacroAssembler::vmovdqa(XMMRegister dst, AddressLiteral src, int vector_len, Register rscratch) {
2766+
assert(rscratch != noreg || always_reachable(src), "missing");
2767+
2768+
if (vector_len == AVX_512bit) {
2769+
evmovdqaq(dst, src, AVX_512bit, rscratch);
2770+
} else if (vector_len == AVX_256bit) {
2771+
vmovdqa(dst, src, rscratch);
2772+
} else {
2773+
movdqa(dst, src, rscratch);
2774+
}
2775+
}
2776+
27232777
void MacroAssembler::kmov(KRegister dst, Address src) {
27242778
if (VM_Version::supports_avx512bw()) {
27252779
kmovql(dst, src);
@@ -2844,6 +2898,29 @@ void MacroAssembler::evmovdquq(XMMRegister dst, AddressLiteral src, int vector_l
28442898
}
28452899
}
28462900

2901+
void MacroAssembler::evmovdqaq(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge, int vector_len, Register rscratch) {
2902+
assert(rscratch != noreg || always_reachable(src), "missing");
2903+
2904+
if (reachable(src)) {
2905+
Assembler::evmovdqaq(dst, mask, as_Address(src), merge, vector_len);
2906+
} else {
2907+
lea(rscratch, src);
2908+
Assembler::evmovdqaq(dst, mask, Address(rscratch, 0), merge, vector_len);
2909+
}
2910+
}
2911+
2912+
void MacroAssembler::evmovdqaq(XMMRegister dst, AddressLiteral src, int vector_len, Register rscratch) {
2913+
assert(rscratch != noreg || always_reachable(src), "missing");
2914+
2915+
if (reachable(src)) {
2916+
Assembler::evmovdqaq(dst, as_Address(src), vector_len);
2917+
} else {
2918+
lea(rscratch, src);
2919+
Assembler::evmovdqaq(dst, Address(rscratch, 0), vector_len);
2920+
}
2921+
}
2922+
2923+
28472924
void MacroAssembler::movdqa(XMMRegister dst, AddressLiteral src, Register rscratch) {
28482925
assert(rscratch != noreg || always_reachable(src), "missing");
28492926

src/hotspot/cpu/x86/macroAssembler_x86.hpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1348,6 +1348,14 @@ class MacroAssembler: public Assembler {
13481348
void vmovdqu(XMMRegister dst, XMMRegister src);
13491349
void vmovdqu(XMMRegister dst, AddressLiteral src, Register rscratch = noreg);
13501350
void vmovdqu(XMMRegister dst, AddressLiteral src, int vector_len, Register rscratch = noreg);
1351+
void vmovdqu(XMMRegister dst, XMMRegister src, int vector_len);
1352+
void vmovdqu(XMMRegister dst, Address src, int vector_len);
1353+
void vmovdqu(Address dst, XMMRegister src, int vector_len);
1354+
1355+
// AVX Aligned forms
1356+
using Assembler::vmovdqa;
1357+
void vmovdqa(XMMRegister dst, AddressLiteral src, Register rscratch = noreg);
1358+
void vmovdqa(XMMRegister dst, AddressLiteral src, int vector_len, Register rscratch = noreg);
13511359

13521360
// AVX512 Unaligned
13531361
void evmovdqu(BasicType type, KRegister kmask, Address dst, XMMRegister src, bool merge, int vector_len);
@@ -1404,6 +1412,7 @@ class MacroAssembler: public Assembler {
14041412
void evmovdquq(XMMRegister dst, Address src, int vector_len) { Assembler::evmovdquq(dst, src, vector_len); }
14051413
void evmovdquq(Address dst, XMMRegister src, int vector_len) { Assembler::evmovdquq(dst, src, vector_len); }
14061414
void evmovdquq(XMMRegister dst, AddressLiteral src, int vector_len, Register rscratch = noreg);
1415+
void evmovdqaq(XMMRegister dst, AddressLiteral src, int vector_len, Register rscratch = noreg);
14071416

14081417
void evmovdquq(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len) {
14091418
if (dst->encoding() != src->encoding() || mask != k0) {
@@ -1413,6 +1422,7 @@ class MacroAssembler: public Assembler {
14131422
void evmovdquq(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len) { Assembler::evmovdquq(dst, mask, src, merge, vector_len); }
14141423
void evmovdquq(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len) { Assembler::evmovdquq(dst, mask, src, merge, vector_len); }
14151424
void evmovdquq(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge, int vector_len, Register rscratch = noreg);
1425+
void evmovdqaq(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge, int vector_len, Register rscratch = noreg);
14161426

14171427
// Move Aligned Double Quadword
14181428
void movdqa(XMMRegister dst, XMMRegister src) { Assembler::movdqa(dst, src); }

0 commit comments

Comments
 (0)