Skip to content
Merged
8 changes: 6 additions & 2 deletions clang/include/clang/Basic/BuiltinsX86.td
Original file line number Diff line number Diff line change
Expand Up @@ -577,11 +577,15 @@ let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] i
def psadbw256
: X86Builtin<
"_Vector<4, long long int>(_Vector<32, char>, _Vector<32, char>)">;
def permdf256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Constant int)">;
def permti256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<4, long long int>, _Constant int)">;
def permdi256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Constant int)">;
}

let Features = "avx2",
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

don't create yet another block - we have existing avx2 blocks with the same feature/attribute set

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks updated in latest commit

Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
def permdf256
: X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Constant int)">;
def permdi256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Constant int)">;
}

let Features = "avx2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
def pmovmskb256 : X86Builtin<"int(_Vector<32, char>)">;
Expand Down
10 changes: 10 additions & 0 deletions clang/lib/AST/ByteCode/InterpBuiltin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4944,6 +4944,16 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
return std::make_pair(0, static_cast<int>(LaneOffset + Index));
});

case X86::BI__builtin_ia32_permdf256:
case X86::BI__builtin_ia32_permdi256:
return interp__builtin_ia32_shuffle_generic(
S, OpPC, Call, [](unsigned DstIdx, unsigned Control) {
// permute4x64 operates on 4 64-bit elements
// For element i (0-3), extract bits [2*i+1:2*i] from Control
unsigned Index = (Control >> (2 * DstIdx)) & 0x3;
return std::make_pair(0, static_cast<int>(Index));
});

case X86::BI__builtin_ia32_vpmultishiftqb128:
case X86::BI__builtin_ia32_vpmultishiftqb256:
case X86::BI__builtin_ia32_vpmultishiftqb512:
Expand Down
13 changes: 13 additions & 0 deletions clang/lib/AST/ExprConstant.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13122,6 +13122,19 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
return Success(R, E);
}

case X86::BI__builtin_ia32_permdf256:
case X86::BI__builtin_ia32_permdi256: {
APValue R;
if (!evalShuffleGeneric(Info, E, R, [](unsigned DstIdx, unsigned Control) {
// permute4x64 operates on 4 64-bit elements
// For element i (0-3), extract bits [2*i+1:2*i] from Control
unsigned Index = (Control >> (2 * DstIdx)) & 0x3;
return std::make_pair(0, static_cast<int>(Index));
}))
return false;
return Success(R, E);
}

case X86::BI__builtin_ia32_vpermilvarps:
case X86::BI__builtin_ia32_vpermilvarps256:
case X86::BI__builtin_ia32_vpermilvarps512: {
Expand Down
22 changes: 22 additions & 0 deletions clang/test/CodeGen/X86/avx2-builtins.c
Original file line number Diff line number Diff line change
Expand Up @@ -1111,12 +1111,34 @@ __m256i test_mm256_permute4x64_epi64(__m256i a) {
// CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> poison, <4 x i32> <i32 3, i32 0, i32 2, i32 0>
return _mm256_permute4x64_epi64(a, 35);
}
// Control value 0x00: [0,0,0,0] -> broadcast element 0
TEST_CONSTEXPR(match_v4di(_mm256_permute4x64_epi64(_mm256_set_epi64x(40LL, 30LL, 20LL, 10LL), 0x00), 10LL, 10LL, 10LL, 10LL));
// Control value 0x1B: [0,1,2,3] -> reverse order [3,2,1,0] = [D,C,B,A]
TEST_CONSTEXPR(match_v4di(_mm256_permute4x64_epi64(_mm256_set_epi64x(40LL, 30LL, 20LL, 10LL), 0x1B), 40LL, 30LL, 20LL, 10LL));
// Control value 0x39: [1,2,3,0] -> rotate left [B,C,D,A]
TEST_CONSTEXPR(match_v4di(_mm256_permute4x64_epi64(_mm256_set_epi64x(40LL, 30LL, 20LL, 10LL), 0x39), 20LL, 30LL, 40LL, 10LL));
// Control value 0x12: [2,0,1,0] -> [C,A,B,A]
TEST_CONSTEXPR(match_v4di(_mm256_permute4x64_epi64(_mm256_set_epi64x(40LL, 30LL, 20LL, 10LL), 0x12), 30LL, 10LL, 20LL, 10LL));
// Control value 0xE4: [3,2,1,0] -> identity [A,B,C,D]
TEST_CONSTEXPR(match_v4di(_mm256_permute4x64_epi64(_mm256_set_epi64x(40LL, 30LL, 20LL, 10LL), 0xE4), 10LL, 20LL, 30LL, 40LL));
// Test with negative values
TEST_CONSTEXPR(match_v4di(_mm256_permute4x64_epi64(_mm256_set_epi64x(-40LL, -30LL, -20LL, -10LL), 0x1B), -40LL, -30LL, -20LL, -10LL));
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Don't use _mm256_set_epi64x - use brace initialisation


__m256d test_mm256_permute4x64_pd(__m256d a) {
// CHECK-LABEL: test_mm256_permute4x64_pd
// CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> poison, <4 x i32> <i32 1, i32 2, i32 1, i32 0>
return _mm256_permute4x64_pd(a, 25);
}
// Control value 0x00: [0,0,0,0] -> broadcast element 0
TEST_CONSTEXPR(match_m256d(_mm256_permute4x64_pd(_mm256_set_pd(4.0, 3.0, 2.0, 1.0), 0x00), 1.0, 1.0, 1.0, 1.0));
// Control value 0x1B: [0,1,2,3] -> reverse order [3,2,1,0] = [D,C,B,A]
TEST_CONSTEXPR(match_m256d(_mm256_permute4x64_pd(_mm256_set_pd(4.0, 3.0, 2.0, 1.0), 0x1B), 4.0, 3.0, 2.0, 1.0));
// Control value 0x39: [1,2,3,0] -> rotate left [B,C,D,A]
TEST_CONSTEXPR(match_m256d(_mm256_permute4x64_pd(_mm256_set_pd(4.0, 3.0, 2.0, 1.0), 0x39), 2.0, 3.0, 4.0, 1.0));
// Control value 0x12: [2,0,1,0] -> [C,A,B,A]
TEST_CONSTEXPR(match_m256d(_mm256_permute4x64_pd(_mm256_set_pd(4.0, 3.0, 2.0, 1.0), 0x12), 3.0, 1.0, 2.0, 1.0));
// Control value 0xE4: [3,2,1,0] -> identity [A,B,C,D]
TEST_CONSTEXPR(match_m256d(_mm256_permute4x64_pd(_mm256_set_pd(4.0, 3.0, 2.0, 1.0), 0xE4), 1.0, 2.0, 3.0, 4.0));
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Don't use _mm256_set_pd - use brace initialisation


__m256i test_mm256_permutevar8x32_epi32(__m256i a, __m256i b) {
// CHECK-LABEL: test_mm256_permutevar8x32_epi32
Expand Down