Skip to content

Commit 676e6fd

Browse files
author
Xiaohong Gong
committed
8367292: VectorAPI: Optimize VectorMask.fromLong/toLong() for SVE
Reviewed-by: epeter, psandoz, haosun, sviswanathan
1 parent bc66d3e commit 676e6fd

File tree

16 files changed

+464
-250
lines changed

16 files changed

+464
-250
lines changed

src/hotspot/cpu/aarch64/aarch64_vector.ad

Lines changed: 53 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -393,6 +393,32 @@ source %{
393393
return false;
394394
}
395395

396+
bool Matcher::mask_op_prefers_predicate(int opcode, const TypeVect* vt) {
397+
// Only SVE supports the predicate feature.
398+
if (UseSVE == 0) {
399+
// On architectures that do not support predicate, masks are stored in
400+
// general vector registers (TypeVect) with sizes ranging from TypeVectA
401+
// to TypeVectX based on the vector size in bytes.
402+
assert(vt->isa_vectmask() == nullptr, "mask type is not matched");
403+
return false;
404+
}
405+
406+
assert(vt->isa_vectmask() != nullptr, "expected TypeVectMask on SVE");
407+
switch (opcode) {
408+
case Op_VectorMaskToLong:
409+
case Op_VectorLongToMask:
410+
// These operations lack native SVE predicate instructions and are
411+
// implemented using general vector instructions instead. Use vector
412+
// registers rather than predicate registers to save the mask for
413+
// better performance.
414+
return false;
415+
default:
416+
// By default, the mask operations are implemented with predicate
417+
// instructions with a predicate input/output.
418+
return true;
419+
}
420+
}
421+
396422
// Assert that the given node is not a variable shift.
397423
bool assert_not_var_shift(const Node* n) {
398424
assert(!n->as_ShiftV()->is_var_shift(), "illegal variable shift");
@@ -6249,31 +6275,44 @@ instruct vmask_tolong_neon(iRegLNoSp dst, vReg src) %{
62496275
ins_pipe(pipe_slow);
62506276
%}
62516277

6252-
instruct vmask_tolong_sve(iRegLNoSp dst, pReg src, vReg tmp1, vReg tmp2) %{
6253-
predicate(UseSVE > 0);
6278+
instruct vmask_tolong_sve(iRegLNoSp dst, vReg src, vReg tmp) %{
6279+
predicate(UseSVE > 0 && !VM_Version::supports_svebitperm());
6280+
match(Set dst (VectorMaskToLong src));
6281+
effect(TEMP tmp);
6282+
format %{ "vmask_tolong_sve $dst, $src\t# KILL $tmp" %}
6283+
ins_encode %{
6284+
// Input "src" is a vector of boolean represented as
6285+
// bytes with 0x00/0x01 as element values.
6286+
__ sve_vmask_tolong($dst$$Register, $src$$FloatRegister,
6287+
$tmp$$FloatRegister, Matcher::vector_length(this, $src));
6288+
%}
6289+
ins_pipe(pipe_slow);
6290+
%}
6291+
6292+
instruct vmask_tolong_sve2(iRegLNoSp dst, vReg src, vReg tmp1, vReg tmp2) %{
6293+
predicate(VM_Version::supports_svebitperm());
62546294
match(Set dst (VectorMaskToLong src));
62556295
effect(TEMP tmp1, TEMP tmp2);
6256-
format %{ "vmask_tolong_sve $dst, $src\t# KILL $tmp1, $tmp2" %}
6296+
format %{ "vmask_tolong_sve2 $dst, $src\t# KILL $tmp1, $tmp2" %}
62576297
ins_encode %{
6258-
__ sve_vmask_tolong($dst$$Register, $src$$PRegister,
6259-
Matcher::vector_element_basic_type(this, $src),
6260-
Matcher::vector_length(this, $src),
6261-
$tmp1$$FloatRegister, $tmp2$$FloatRegister);
6298+
// Input "src" is a vector of boolean represented as
6299+
// bytes with 0x00/0x01 as element values.
6300+
__ sve2_vmask_tolong($dst$$Register, $src$$FloatRegister,
6301+
$tmp1$$FloatRegister, $tmp2$$FloatRegister,
6302+
Matcher::vector_length(this, $src));
62626303
%}
62636304
ins_pipe(pipe_slow);
62646305
%}
62656306

62666307
// fromlong
62676308

6268-
instruct vmask_fromlong(pReg dst, iRegL src, vReg tmp1, vReg tmp2) %{
6309+
instruct vmask_fromlong(vReg dst, iRegL src, vReg tmp) %{
62696310
match(Set dst (VectorLongToMask src));
6270-
effect(TEMP tmp1, TEMP tmp2);
6271-
format %{ "vmask_fromlong $dst, $src\t# vector (sve2). KILL $tmp1, $tmp2" %}
6311+
effect(TEMP_DEF dst, TEMP tmp);
6312+
format %{ "vmask_fromlong $dst, $src\t# vector (sve2). KILL $tmp" %}
62726313
ins_encode %{
6273-
__ sve_vmask_fromlong($dst$$PRegister, $src$$Register,
6274-
Matcher::vector_element_basic_type(this),
6275-
Matcher::vector_length(this),
6276-
$tmp1$$FloatRegister, $tmp2$$FloatRegister);
6314+
__ sve_vmask_fromlong($dst$$FloatRegister, $src$$Register,
6315+
$tmp$$FloatRegister, Matcher::vector_length(this));
62776316
%}
62786317
ins_pipe(pipe_slow);
62796318
%}

src/hotspot/cpu/aarch64/aarch64_vector_ad.m4

Lines changed: 53 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -383,6 +383,32 @@ source %{
383383
return false;
384384
}
385385

386+
bool Matcher::mask_op_prefers_predicate(int opcode, const TypeVect* vt) {
387+
// Only SVE supports the predicate feature.
388+
if (UseSVE == 0) {
389+
// On architectures that do not support predicate, masks are stored in
390+
// general vector registers (TypeVect) with sizes ranging from TypeVectA
391+
// to TypeVectX based on the vector size in bytes.
392+
assert(vt->isa_vectmask() == nullptr, "mask type is not matched");
393+
return false;
394+
}
395+
396+
assert(vt->isa_vectmask() != nullptr, "expected TypeVectMask on SVE");
397+
switch (opcode) {
398+
case Op_VectorMaskToLong:
399+
case Op_VectorLongToMask:
400+
// These operations lack native SVE predicate instructions and are
401+
// implemented using general vector instructions instead. Use vector
402+
// registers rather than predicate registers to save the mask for
403+
// better performance.
404+
return false;
405+
default:
406+
// By default, the mask operations are implemented with predicate
407+
// instructions with a predicate input/output.
408+
return true;
409+
}
410+
}
411+
386412
// Assert that the given node is not a variable shift.
387413
bool assert_not_var_shift(const Node* n) {
388414
assert(!n->as_ShiftV()->is_var_shift(), "illegal variable shift");
@@ -4303,31 +4329,44 @@ instruct vmask_tolong_neon(iRegLNoSp dst, vReg src) %{
43034329
ins_pipe(pipe_slow);
43044330
%}
43054331

4306-
instruct vmask_tolong_sve(iRegLNoSp dst, pReg src, vReg tmp1, vReg tmp2) %{
4307-
predicate(UseSVE > 0);
4332+
instruct vmask_tolong_sve(iRegLNoSp dst, vReg src, vReg tmp) %{
4333+
predicate(UseSVE > 0 && !VM_Version::supports_svebitperm());
4334+
match(Set dst (VectorMaskToLong src));
4335+
effect(TEMP tmp);
4336+
format %{ "vmask_tolong_sve $dst, $src\t# KILL $tmp" %}
4337+
ins_encode %{
4338+
// Input "src" is a vector of boolean represented as
4339+
// bytes with 0x00/0x01 as element values.
4340+
__ sve_vmask_tolong($dst$$Register, $src$$FloatRegister,
4341+
$tmp$$FloatRegister, Matcher::vector_length(this, $src));
4342+
%}
4343+
ins_pipe(pipe_slow);
4344+
%}
4345+
4346+
instruct vmask_tolong_sve2(iRegLNoSp dst, vReg src, vReg tmp1, vReg tmp2) %{
4347+
predicate(VM_Version::supports_svebitperm());
43084348
match(Set dst (VectorMaskToLong src));
43094349
effect(TEMP tmp1, TEMP tmp2);
4310-
format %{ "vmask_tolong_sve $dst, $src\t# KILL $tmp1, $tmp2" %}
4350+
format %{ "vmask_tolong_sve2 $dst, $src\t# KILL $tmp1, $tmp2" %}
43114351
ins_encode %{
4312-
__ sve_vmask_tolong($dst$$Register, $src$$PRegister,
4313-
Matcher::vector_element_basic_type(this, $src),
4314-
Matcher::vector_length(this, $src),
4315-
$tmp1$$FloatRegister, $tmp2$$FloatRegister);
4352+
// Input "src" is a vector of boolean represented as
4353+
// bytes with 0x00/0x01 as element values.
4354+
__ sve2_vmask_tolong($dst$$Register, $src$$FloatRegister,
4355+
$tmp1$$FloatRegister, $tmp2$$FloatRegister,
4356+
Matcher::vector_length(this, $src));
43164357
%}
43174358
ins_pipe(pipe_slow);
43184359
%}
43194360

43204361
// fromlong
43214362

4322-
instruct vmask_fromlong(pReg dst, iRegL src, vReg tmp1, vReg tmp2) %{
4363+
instruct vmask_fromlong(vReg dst, iRegL src, vReg tmp) %{
43234364
match(Set dst (VectorLongToMask src));
4324-
effect(TEMP tmp1, TEMP tmp2);
4325-
format %{ "vmask_fromlong $dst, $src\t# vector (sve2). KILL $tmp1, $tmp2" %}
4365+
effect(TEMP_DEF dst, TEMP tmp);
4366+
format %{ "vmask_fromlong $dst, $src\t# vector (sve2). KILL $tmp" %}
43264367
ins_encode %{
4327-
__ sve_vmask_fromlong($dst$$PRegister, $src$$Register,
4328-
Matcher::vector_element_basic_type(this),
4329-
Matcher::vector_length(this),
4330-
$tmp1$$FloatRegister, $tmp2$$FloatRegister);
4368+
__ sve_vmask_fromlong($dst$$FloatRegister, $src$$Register,
4369+
$tmp$$FloatRegister, Matcher::vector_length(this));
43314370
%}
43324371
ins_pipe(pipe_slow);
43334372
%}

0 commit comments

Comments
 (0)