Skip to content

Commit 4ed337e

Browse files
rebase
Created using spr 1.3.7
2 parents a656219 + 562d911 commit 4ed337e

File tree

4 files changed

+49
-3
lines changed

4 files changed

+49
-3
lines changed

clang/include/clang/Basic/BuiltinsX86.td

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -585,13 +585,14 @@ let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] i
585585
def psadbw256
586586
: X86Builtin<
587587
"_Vector<4, long long int>(_Vector<32, char>, _Vector<32, char>)">;
588-
def permdf256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Constant int)">;
589588
def permti256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<4, long long int>, _Constant int)">;
590-
def permdi256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Constant int)">;
591589
}
592590

593-
594591
let Features = "avx2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
592+
def permdf256
593+
: X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Constant int)">;
594+
def permdi256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long "
595+
"int>, _Constant int)">;
595596
def pmovmskb256 : X86Builtin<"int(_Vector<32, char>)">;
596597
def pavgb256 : X86Builtin<"_Vector<32, unsigned char>(_Vector<32, unsigned char>, _Vector<32, unsigned char>)">;
597598
def pavgw256 : X86Builtin<"_Vector<16, unsigned short>(_Vector<16, unsigned short>, _Vector<16, unsigned short>)">;

clang/lib/AST/ByteCode/InterpBuiltin.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5116,6 +5116,16 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
51165116
return std::make_pair(0, static_cast<int>(LaneOffset + Index));
51175117
});
51185118

5119+
case X86::BI__builtin_ia32_permdf256:
5120+
case X86::BI__builtin_ia32_permdi256:
5121+
return interp__builtin_ia32_shuffle_generic(
5122+
S, OpPC, Call, [](unsigned DstIdx, unsigned Control) {
5123+
// permute4x64 operates on 4 64-bit elements
5124+
// For element i (0-3), extract bits [2*i+1:2*i] from Control
5125+
unsigned Index = (Control >> (2 * DstIdx)) & 0x3;
5126+
return std::make_pair(0, static_cast<int>(Index));
5127+
});
5128+
51195129
case X86::BI__builtin_ia32_vpmultishiftqb128:
51205130
case X86::BI__builtin_ia32_vpmultishiftqb256:
51215131
case X86::BI__builtin_ia32_vpmultishiftqb512:

clang/lib/AST/ExprConstant.cpp

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13269,6 +13269,19 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
1326913269
return Success(R, E);
1327013270
}
1327113271

13272+
case X86::BI__builtin_ia32_permdf256:
13273+
case X86::BI__builtin_ia32_permdi256: {
13274+
APValue R;
13275+
if (!evalShuffleGeneric(Info, E, R, [](unsigned DstIdx, unsigned Control) {
13276+
// permute4x64 operates on 4 64-bit elements
13277+
// For element i (0-3), extract bits [2*i+1:2*i] from Control
13278+
unsigned Index = (Control >> (2 * DstIdx)) & 0x3;
13279+
return std::make_pair(0, static_cast<int>(Index));
13280+
}))
13281+
return false;
13282+
return Success(R, E);
13283+
}
13284+
1327213285
case X86::BI__builtin_ia32_vpermilvarps:
1327313286
case X86::BI__builtin_ia32_vpermilvarps256:
1327413287
case X86::BI__builtin_ia32_vpermilvarps512: {

clang/test/CodeGen/X86/avx2-builtins.c

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1111,12 +1111,34 @@ __m256i test_mm256_permute4x64_epi64(__m256i a) {
11111111
// CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> poison, <4 x i32> <i32 3, i32 0, i32 2, i32 0>
11121112
return _mm256_permute4x64_epi64(a, 35);
11131113
}
1114+
// Control value 0x00: [0,0,0,0] -> broadcast element 0
1115+
TEST_CONSTEXPR(match_v4di(_mm256_permute4x64_epi64(((__m256i)(__v4di){40LL, 30LL, 20LL, 10LL}), 0x00), 40LL, 40LL, 40LL, 40LL));
1116+
// Control value 0x1B: [0,1,2,3] -> reverse order [3,2,1,0] = [D,C,B,A]
1117+
TEST_CONSTEXPR(match_v4di(_mm256_permute4x64_epi64(((__m256i)(__v4di){40LL, 30LL, 20LL, 10LL}), 0x1B), 10LL, 20LL, 30LL, 40LL));
1118+
// Control value 0x39: [1,2,3,0] -> rotate left [B,C,D,A]
1119+
TEST_CONSTEXPR(match_v4di(_mm256_permute4x64_epi64(((__m256i)(__v4di){40LL, 30LL, 20LL, 10LL}), 0x39), 30LL, 20LL, 10LL, 40LL));
1120+
// Control value 0x12: [2,0,1,0] -> [C,A,B,A]
1121+
TEST_CONSTEXPR(match_v4di(_mm256_permute4x64_epi64(((__m256i)(__v4di){40LL, 30LL, 20LL, 10LL}), 0x12), 20LL, 40LL, 30LL, 40LL));
1122+
// Control value 0xE4: [3,2,1,0] -> identity [A,B,C,D]
1123+
TEST_CONSTEXPR(match_v4di(_mm256_permute4x64_epi64(((__m256i)(__v4di){40LL, 30LL, 20LL, 10LL}), 0xE4), 40LL, 30LL, 20LL, 10LL));
1124+
// Test with negative values
1125+
TEST_CONSTEXPR(match_v4di(_mm256_permute4x64_epi64(((__m256i)(__v4di){-40LL, -30LL, -20LL, -10LL}), 0x1B), -10LL, -20LL, -30LL, -40LL));
11141126

11151127
__m256d test_mm256_permute4x64_pd(__m256d a) {
11161128
// CHECK-LABEL: test_mm256_permute4x64_pd
11171129
// CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> poison, <4 x i32> <i32 1, i32 2, i32 1, i32 0>
11181130
return _mm256_permute4x64_pd(a, 25);
11191131
}
1132+
// Control value 0x00: [0,0,0,0] -> broadcast element 0
1133+
TEST_CONSTEXPR(match_m256d(_mm256_permute4x64_pd(((__m256d){4.0, 3.0, 2.0, 1.0}), 0x00), 4.0, 4.0, 4.0, 4.0));
1134+
// Control value 0x1B: [0,1,2,3] -> reverse order [3,2,1,0] = [D,C,B,A]
1135+
TEST_CONSTEXPR(match_m256d(_mm256_permute4x64_pd(((__m256d){4.0, 3.0, 2.0, 1.0}), 0x1B), 1.0, 2.0, 3.0, 4.0));
1136+
// Control value 0x39: [1,2,3,0] -> rotate left [B,C,D,A]
1137+
TEST_CONSTEXPR(match_m256d(_mm256_permute4x64_pd(((__m256d){4.0, 3.0, 2.0, 1.0}), 0x39), 3.0, 2.0, 1.0, 4.0));
1138+
// Control value 0x12: [2,0,1,0] -> [C,A,B,A]
1139+
TEST_CONSTEXPR(match_m256d(_mm256_permute4x64_pd(((__m256d){4.0, 3.0, 2.0, 1.0}), 0x12), 2.0, 4.0, 3.0, 4.0));
1140+
// Control value 0xE4: [3,2,1,0] -> identity [A,B,C,D]
1141+
TEST_CONSTEXPR(match_m256d(_mm256_permute4x64_pd(((__m256d){4.0, 3.0, 2.0, 1.0}), 0xE4), 4.0, 3.0, 2.0, 1.0));
11201142

11211143
__m256i test_mm256_permutevar8x32_epi32(__m256i a, __m256i b) {
11221144
// CHECK-LABEL: test_mm256_permutevar8x32_epi32

0 commit comments

Comments
 (0)