@@ -1111,12 +1111,34 @@ __m256i test_mm256_permute4x64_epi64(__m256i a) {
11111111 // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> poison, <4 x i32> <i32 3, i32 0, i32 2, i32 0>
11121112 return _mm256_permute4x64_epi64 (a , 35 );
11131113}
1114+ // Control value 0x00: [0,0,0,0] -> broadcast element 0
1115+ TEST_CONSTEXPR (match_v4di (_mm256_permute4x64_epi64 (((__m256i )(__v4di ){40LL , 30LL , 20LL , 10LL }), 0x00 ), 40LL , 40LL , 40LL , 40LL ));
1116+ // Control value 0x1B: [0,1,2,3] -> reverse order [3,2,1,0] = [D,C,B,A]
1117+ TEST_CONSTEXPR (match_v4di (_mm256_permute4x64_epi64 (((__m256i )(__v4di ){40LL , 30LL , 20LL , 10LL }), 0x1B ), 10LL , 20LL , 30LL , 40LL ));
1118+ // Control value 0x39: [1,2,3,0] -> rotate left [B,C,D,A]
1119+ TEST_CONSTEXPR (match_v4di (_mm256_permute4x64_epi64 (((__m256i )(__v4di ){40LL , 30LL , 20LL , 10LL }), 0x39 ), 30LL , 20LL , 10LL , 40LL ));
1120+ // Control value 0x12: [2,0,1,0] -> [C,A,B,A]
1121+ TEST_CONSTEXPR (match_v4di (_mm256_permute4x64_epi64 (((__m256i )(__v4di ){40LL , 30LL , 20LL , 10LL }), 0x12 ), 20LL , 40LL , 30LL , 40LL ));
1122+ // Control value 0xE4: [3,2,1,0] -> identity [A,B,C,D]
1123+ TEST_CONSTEXPR (match_v4di (_mm256_permute4x64_epi64 (((__m256i )(__v4di ){40LL , 30LL , 20LL , 10LL }), 0xE4 ), 40LL , 30LL , 20LL , 10LL ));
1124+ // Test with negative values
1125+ TEST_CONSTEXPR (match_v4di (_mm256_permute4x64_epi64 (((__m256i )(__v4di ){-40LL , -30LL , -20LL , -10LL }), 0x1B ), -10LL , -20LL , -30LL , -40LL ));
11141126
11151127__m256d test_mm256_permute4x64_pd (__m256d a ) {
11161128 // CHECK-LABEL: test_mm256_permute4x64_pd
11171129 // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> poison, <4 x i32> <i32 1, i32 2, i32 1, i32 0>
11181130 return _mm256_permute4x64_pd (a , 25 );
11191131}
1132+ // Control value 0x00: [0,0,0,0] -> broadcast element 0
1133+ TEST_CONSTEXPR (match_m256d (_mm256_permute4x64_pd (((__m256d ){4.0 , 3.0 , 2.0 , 1.0 }), 0x00 ), 4.0 , 4.0 , 4.0 , 4.0 ));
1134+ // Control value 0x1B: [0,1,2,3] -> reverse order [3,2,1,0] = [D,C,B,A]
1135+ TEST_CONSTEXPR (match_m256d (_mm256_permute4x64_pd (((__m256d ){4.0 , 3.0 , 2.0 , 1.0 }), 0x1B ), 1.0 , 2.0 , 3.0 , 4.0 ));
1136+ // Control value 0x39: [1,2,3,0] -> rotate left [B,C,D,A]
1137+ TEST_CONSTEXPR (match_m256d (_mm256_permute4x64_pd (((__m256d ){4.0 , 3.0 , 2.0 , 1.0 }), 0x39 ), 3.0 , 2.0 , 1.0 , 4.0 ));
1138+ // Control value 0x12: [2,0,1,0] -> [C,A,B,A]
1139+ TEST_CONSTEXPR (match_m256d (_mm256_permute4x64_pd (((__m256d ){4.0 , 3.0 , 2.0 , 1.0 }), 0x12 ), 2.0 , 4.0 , 3.0 , 4.0 ));
1140+ // Control value 0xE4: [3,2,1,0] -> identity [A,B,C,D]
1141+ TEST_CONSTEXPR (match_m256d (_mm256_permute4x64_pd (((__m256d ){4.0 , 3.0 , 2.0 , 1.0 }), 0xE4 ), 4.0 , 3.0 , 2.0 , 1.0 ));
11201142
11211143__m256i test_mm256_permutevar8x32_epi32 (__m256i a , __m256i b ) {
11221144 // CHECK-LABEL: test_mm256_permutevar8x32_epi32
0 commit comments