@@ -951,6 +951,92 @@ address StubGenerator::generate_fp_mask(const char *stub_name, int64_t mask) {
951951 return start;
952952}
953953
954+ address StubGenerator::generate_compress_perm_table (const char *stub_name, int32_t esize) {
955+ __ align (CodeEntryAlignment);
956+ StubCodeMark mark (this , " StubRoutines" , stub_name);
957+ address start = __ pc ();
958+ if (esize == 32 ) {
959+ // Loop to generate 256 x 8 int compression permute index table. A row is
960+ // accessed using 8 bit index computed using vector mask. An entry in
961+ // a row holds either a valid permute index corresponding to set bit position
962+ // or a -1 (default) value.
963+ for (int mask = 0 ; mask < 256 ; mask++) {
964+ int ctr = 0 ;
965+ for (int j = 0 ; j < 8 ; j++) {
966+ if (mask & (1 << j)) {
967+ __ emit_data (j, relocInfo::none);
968+ ctr++;
969+ }
970+ }
971+ for (; ctr < 8 ; ctr++) {
972+ __ emit_data (-1 , relocInfo::none);
973+ }
974+ }
975+ } else {
976+ assert (esize == 64 , " " );
977+ // Loop to generate 16 x 4 long compression permute index table. A row is
978+ // accessed using 4 bit index computed using vector mask. An entry in
979+ // a row holds either a valid permute index pair for a quadword corresponding
980+ // to set bit position or a -1 (default) value.
981+ for (int mask = 0 ; mask < 16 ; mask++) {
982+ int ctr = 0 ;
983+ for (int j = 0 ; j < 4 ; j++) {
984+ if (mask & (1 << j)) {
985+ __ emit_data (2 * j, relocInfo::none);
986+ __ emit_data (2 * j + 1 , relocInfo::none);
987+ ctr++;
988+ }
989+ }
990+ for (; ctr < 4 ; ctr++) {
991+ __ emit_data64 (-1L , relocInfo::none);
992+ }
993+ }
994+ }
995+ return start;
996+ }
997+
998+ address StubGenerator::generate_expand_perm_table (const char *stub_name, int32_t esize) {
999+ __ align (CodeEntryAlignment);
1000+ StubCodeMark mark (this , " StubRoutines" , stub_name);
1001+ address start = __ pc ();
1002+ if (esize == 32 ) {
1003+ // Loop to generate 256 x 8 int expand permute index table. A row is accessed
1004+ // using 8 bit index computed using vector mask. An entry in a row holds either
1005+ // a valid permute index (starting from least significant lane) placed at poisition
1006+ // corresponding to set bit position or a -1 (default) value.
1007+ for (int mask = 0 ; mask < 256 ; mask++) {
1008+ int ctr = 0 ;
1009+ for (int j = 0 ; j < 8 ; j++) {
1010+ if (mask & (1 << j)) {
1011+ __ emit_data (ctr++, relocInfo::none);
1012+ } else {
1013+ __ emit_data (-1 , relocInfo::none);
1014+ }
1015+ }
1016+ }
1017+ } else {
1018+ assert (esize == 64 , " " );
1019+ // Loop to generate 16 x 4 long expand permute index table. A row is accessed
1020+ // using 4 bit index computed using vector mask. An entry in a row holds either
1021+ // a valid doubleword permute index pair representing a quadword index (starting
1022+ // from least significant lane) placed at poisition corresponding to set bit
1023+ // position or a -1 (default) value.
1024+ for (int mask = 0 ; mask < 16 ; mask++) {
1025+ int ctr = 0 ;
1026+ for (int j = 0 ; j < 4 ; j++) {
1027+ if (mask & (1 << j)) {
1028+ __ emit_data (2 * ctr, relocInfo::none);
1029+ __ emit_data (2 * ctr + 1 , relocInfo::none);
1030+ ctr++;
1031+ } else {
1032+ __ emit_data64 (-1L , relocInfo::none);
1033+ }
1034+ }
1035+ }
1036+ }
1037+ return start;
1038+ }
1039+
9541040address StubGenerator::generate_vector_mask (const char *stub_name, int64_t mask) {
9551041 __ align (CodeEntryAlignment);
9561042 StubCodeMark mark (this , " StubRoutines" , stub_name);
@@ -4095,6 +4181,13 @@ void StubGenerator::generate_compiler_stubs() {
40954181 StubRoutines::x86::_vector_reverse_byte_perm_mask_int = generate_vector_reverse_byte_perm_mask_int (" perm_mask_int" );
40964182 StubRoutines::x86::_vector_reverse_byte_perm_mask_short = generate_vector_reverse_byte_perm_mask_short (" perm_mask_short" );
40974183
4184+ if (VM_Version::supports_avx2 () && !VM_Version::supports_avx512vl ()) {
4185+ StubRoutines::x86::_compress_perm_table32 = generate_compress_perm_table (" compress_perm_table32" , 32 );
4186+ StubRoutines::x86::_compress_perm_table64 = generate_compress_perm_table (" compress_perm_table64" , 64 );
4187+ StubRoutines::x86::_expand_perm_table32 = generate_expand_perm_table (" expand_perm_table32" , 32 );
4188+ StubRoutines::x86::_expand_perm_table64 = generate_expand_perm_table (" expand_perm_table64" , 64 );
4189+ }
4190+
40984191 if (VM_Version::supports_avx2 () && !VM_Version::supports_avx512_vpopcntdq ()) {
40994192 // lut implementation influenced by counting 1s algorithm from section 5-1 of Hackers' Delight.
41004193 StubRoutines::x86::_vector_popcount_lut = generate_popcount_avx_lut (" popcount_lut" );
0 commit comments