@@ -257,6 +257,28 @@ source %{
257257 return false;
258258 }
259259 break;
260+ case Op_SelectFromTwoVector:
261+ // The "tbl" instruction for two vector table is supported only in Neon and SVE2. Return
262+ // false if vector length > 16B but supported SVE version < 2.
263+ // For vector length of 16B, generate SVE2 "tbl" instruction if SVE2 is supported, else
264+ // generate Neon "tbl" instruction to select from two vectors.
265+ // This operation is disabled for doubles and longs on machines with SVE < 2 and instead
266+ // the default VectorRearrange + VectorBlend is generated because the performance of the default
267+ // implementation was better than or equal to the implementation for SelectFromTwoVector.
268+ if (UseSVE < 2 && (type2aelembytes(bt) == 8 || length_in_bytes > 16)) {
269+ return false;
270+ }
271+
272+ // Because the SVE2 "tbl" instruction is unpredicated and partial operations cannot be generated
273+ // using masks, we disable this operation on machines where length_in_bytes < MaxVectorSize
274+ // on that machine with the only exception of 8B vector length. This is because at the time of
275+ // writing this, there is no SVE2 machine available with length_in_bytes > 8 and
276+ // length_in_bytes < MaxVectorSize to test this operation on (for example - there isn't an
277+ // SVE2 machine available with MaxVectorSize = 32 to test a case with length_in_bytes = 16).
278+ if (UseSVE == 2 && length_in_bytes > 8 && length_in_bytes < MaxVectorSize) {
279+ return false;
280+ }
281+ break;
260282 default:
261283 break;
262284 }
@@ -7172,3 +7194,71 @@ instruct vexpandBits(vReg dst, vReg src1, vReg src2) %{
71727194 %}
71737195 ins_pipe(pipe_slow);
71747196%}
7197+
7198+ // ------------------------------------- SelectFromTwoVector ------------------------------------
7199+ // The Neon and SVE2 tbl instruction for two vector lookup requires both the source vectors to be
7200+ // consecutive. The match rules for SelectFromTwoVector reserve two consecutive vector registers
7201+ // for src1 and src2.
7202+ // Four combinations of vector registers for vselect_from_two_vectors are chosen at random
7203+ // (two from volatile and two from non-volatile set) which gives more freedom to the register
7204+ // allocator to choose the best pair of source registers at that point.
7205+
7206+ instruct vselect_from_two_vectors_10_11(vReg dst, vReg_V10 src1, vReg_V11 src2,
7207+ vReg index, vReg tmp) %{
7208+ effect(TEMP_DEF dst, TEMP tmp);
7209+ match(Set dst (SelectFromTwoVector (Binary index src1) src2));
7210+ format %{ "vselect_from_two_vectors_10_11 $dst, $src1, $src2, $index\t# KILL $tmp" %}
7211+ ins_encode %{
7212+ BasicType bt = Matcher::vector_element_basic_type(this);
7213+ uint length_in_bytes = Matcher::vector_length_in_bytes(this);
7214+ __ select_from_two_vectors($dst$$FloatRegister, $src1$$FloatRegister,
7215+ $src2$$FloatRegister, $index$$FloatRegister,
7216+ $tmp$$FloatRegister, bt, length_in_bytes);
7217+ %}
7218+ ins_pipe(pipe_slow);
7219+ %}
7220+
7221+ instruct vselect_from_two_vectors_12_13(vReg dst, vReg_V12 src1, vReg_V13 src2,
7222+ vReg index, vReg tmp) %{
7223+ effect(TEMP_DEF dst, TEMP tmp);
7224+ match(Set dst (SelectFromTwoVector (Binary index src1) src2));
7225+ format %{ "vselect_from_two_vectors_12_13 $dst, $src1, $src2, $index\t# KILL $tmp" %}
7226+ ins_encode %{
7227+ BasicType bt = Matcher::vector_element_basic_type(this);
7228+ uint length_in_bytes = Matcher::vector_length_in_bytes(this);
7229+ __ select_from_two_vectors($dst$$FloatRegister, $src1$$FloatRegister,
7230+ $src2$$FloatRegister, $index$$FloatRegister,
7231+ $tmp$$FloatRegister, bt, length_in_bytes);
7232+ %}
7233+ ins_pipe(pipe_slow);
7234+ %}
7235+
7236+ instruct vselect_from_two_vectors_17_18(vReg dst, vReg_V17 src1, vReg_V18 src2,
7237+ vReg index, vReg tmp) %{
7238+ effect(TEMP_DEF dst, TEMP tmp);
7239+ match(Set dst (SelectFromTwoVector (Binary index src1) src2));
7240+ format %{ "vselect_from_two_vectors_17_18 $dst, $src1, $src2, $index\t# KILL $tmp" %}
7241+ ins_encode %{
7242+ BasicType bt = Matcher::vector_element_basic_type(this);
7243+ uint length_in_bytes = Matcher::vector_length_in_bytes(this);
7244+ __ select_from_two_vectors($dst$$FloatRegister, $src1$$FloatRegister,
7245+ $src2$$FloatRegister, $index$$FloatRegister,
7246+ $tmp$$FloatRegister, bt, length_in_bytes);
7247+ %}
7248+ ins_pipe(pipe_slow);
7249+ %}
7250+
7251+ instruct vselect_from_two_vectors_23_24(vReg dst, vReg_V23 src1, vReg_V24 src2,
7252+ vReg index, vReg tmp) %{
7253+ effect(TEMP_DEF dst, TEMP tmp);
7254+ match(Set dst (SelectFromTwoVector (Binary index src1) src2));
7255+ format %{ "vselect_from_two_vectors_23_24 $dst, $src1, $src2, $index\t# KILL $tmp" %}
7256+ ins_encode %{
7257+ BasicType bt = Matcher::vector_element_basic_type(this);
7258+ uint length_in_bytes = Matcher::vector_length_in_bytes(this);
7259+ __ select_from_two_vectors($dst$$FloatRegister, $src1$$FloatRegister,
7260+ $src2$$FloatRegister, $index$$FloatRegister,
7261+ $tmp$$FloatRegister, bt, length_in_bytes);
7262+ %}
7263+ ins_pipe(pipe_slow);
7264+ %}
0 commit comments