From 89e3954737c3648dacbee1ba27b03a57fc02f433 Mon Sep 17 00:00:00 2001 From: ahmed Date: Wed, 3 Dec 2025 10:39:43 +0200 Subject: [PATCH 1/9] feat: Add constexpr support for permdi256 and permdf256 --- clang/include/clang/Basic/BuiltinsX86.td | 5 ++++- clang/lib/AST/ByteCode/InterpBuiltin.cpp | 10 ++++++++++ clang/lib/AST/ExprConstant.cpp | 13 +++++++++++++ clang/lib/Headers/avx2intrin.h | 12 ++++++++---- 4 files changed, 35 insertions(+), 5 deletions(-) diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td index 98cea35beb0ea..d07ded80c2b1b 100644 --- a/clang/include/clang/Basic/BuiltinsX86.td +++ b/clang/include/clang/Basic/BuiltinsX86.td @@ -577,8 +577,11 @@ let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] i def psadbw256 : X86Builtin< "_Vector<4, long long int>(_Vector<32, char>, _Vector<32, char>)">; - def permdf256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Constant int)">; def permti256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<4, long long int>, _Constant int)">; +} + +let Features = "avx2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in { + def permdf256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Constant int)">; def permdi256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Constant int)">; } diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index 971fce541bb88..3ff5dc3eb5600 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -4944,6 +4944,16 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, return std::make_pair(0, static_cast(LaneOffset + Index)); }); + case X86::BI__builtin_ia32_permdf256: + case X86::BI__builtin_ia32_permdi256: + return interp__builtin_ia32_shuffle_generic( + S, OpPC, Call, [](unsigned DstIdx, unsigned Control) { + // permute4x64 operates on 4 64-bit elements + // For element i (0-3), extract bits [2*i+1:2*i] from Control + unsigned Index = (Control >> (2 * DstIdx)) & 0x3; + return std::make_pair(0, static_cast(Index)); + }); + case X86::BI__builtin_ia32_vpmultishiftqb128: case X86::BI__builtin_ia32_vpmultishiftqb256: case X86::BI__builtin_ia32_vpmultishiftqb512: diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index e5af4cb049ba9..13f27be6df58f 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -13122,6 +13122,19 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { return Success(R, E); } + case X86::BI__builtin_ia32_permdf256: + case X86::BI__builtin_ia32_permdi256: { + APValue R; + if (!evalShuffleGeneric(Info, E, R, [](unsigned DstIdx, unsigned Control) { + // permute4x64 operates on 4 64-bit elements + // For element i (0-3), extract bits [2*i+1:2*i] from Control + unsigned Index = (Control >> (2 * DstIdx)) & 0x3; + return std::make_pair(0, static_cast(Index)); + })) + return false; + return Success(R, E); + } + case X86::BI__builtin_ia32_vpermilvarps: case X86::BI__builtin_ia32_vpermilvarps256: case X86::BI__builtin_ia32_vpermilvarps512: { diff --git a/clang/lib/Headers/avx2intrin.h b/clang/lib/Headers/avx2intrin.h index d3ceb2327ac62..dbc3dd01c3cc7 100644 --- a/clang/lib/Headers/avx2intrin.h +++ b/clang/lib/Headers/avx2intrin.h @@ -3238,8 +3238,10 @@ _mm256_permutevar8x32_epi32(__m256i __a, __m256i __b) { /// \a M[1:0] specifies the index in \a a for element 0 of the result, /// \a M[3:2] specifies the index for element 1, and so forth. /// \returns A 256-bit vector of [4 x double] containing the result. -#define _mm256_permute4x64_pd(V, M) \ - ((__m256d)__builtin_ia32_permdf256((__v4df)(__m256d)(V), (int)(M))) +static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_permute4x64_pd(__m256d __V, const int __M) { + return (__m256d)__builtin_ia32_permdf256((__v4df)(__m256d)(__V), (int)(__M)); +} /// Sets the result's 256-bit vector of [8 x float] to copies of elements of /// the 256-bit vector of [8 x float] in \a __a as specified by indexes in @@ -3295,8 +3297,10 @@ _mm256_permutevar8x32_ps(__m256 __a, __m256i __b) { /// \a M[1:0] specifies the index in \a a for element 0 of the result, /// \a M[3:2] specifies the index for element 1, and so forth. /// \returns A 256-bit vector of [4 x i64] containing the result. -#define _mm256_permute4x64_epi64(V, M) \ - ((__m256i)__builtin_ia32_permdi256((__v4di)(__m256i)(V), (int)(M))) +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_permute4x64_epi64(__m256i __V, const int __M) { + return (__m256i)__builtin_ia32_permdi256((__v4di)(__m256i)(__V), (int)(__M)); +} /// Sets each half of the 256-bit result either to zero or to one of the /// four possible 128-bit halves of the 256-bit vectors \a V1 and \a V2, From b949e990f95d60b8bc9ae8db559268a684c37f0b Mon Sep 17 00:00:00 2001 From: ahmed Date: Wed, 3 Dec 2025 10:48:49 +0200 Subject: [PATCH 2/9] feat: add tests --- clang/test/CodeGen/X86/avx2-builtins.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/clang/test/CodeGen/X86/avx2-builtins.c b/clang/test/CodeGen/X86/avx2-builtins.c index d6facfea8962e..83e7f2a25cadb 100644 --- a/clang/test/CodeGen/X86/avx2-builtins.c +++ b/clang/test/CodeGen/X86/avx2-builtins.c @@ -1111,12 +1111,34 @@ __m256i test_mm256_permute4x64_epi64(__m256i a) { // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> poison, <4 x i32> return _mm256_permute4x64_epi64(a, 35); } +// Control value 0x00: [0,0,0,0] -> broadcast element 0 +TEST_CONSTEXPR(match_v4di(_mm256_permute4x64_epi64(_mm256_set_epi64x(40LL, 30LL, 20LL, 10LL), 0x00), 10LL, 10LL, 10LL, 10LL)); +// Control value 0x1B: [0,1,2,3] -> reverse order [3,2,1,0] = [D,C,B,A] +TEST_CONSTEXPR(match_v4di(_mm256_permute4x64_epi64(_mm256_set_epi64x(40LL, 30LL, 20LL, 10LL), 0x1B), 40LL, 30LL, 20LL, 10LL)); +// Control value 0x39: [1,2,3,0] -> rotate left [B,C,D,A] +TEST_CONSTEXPR(match_v4di(_mm256_permute4x64_epi64(_mm256_set_epi64x(40LL, 30LL, 20LL, 10LL), 0x39), 20LL, 30LL, 40LL, 10LL)); +// Control value 0x12: [2,1,0,1] -> [C,B,A,B] +TEST_CONSTEXPR(match_v4di(_mm256_permute4x64_epi64(_mm256_set_epi64x(40LL, 30LL, 20LL, 10LL), 0x12), 30LL, 20LL, 10LL, 20LL)); +// Control value 0xE4: [3,2,1,0] -> identity [A,B,C,D] +TEST_CONSTEXPR(match_v4di(_mm256_permute4x64_epi64(_mm256_set_epi64x(40LL, 30LL, 20LL, 10LL), 0xE4), 10LL, 20LL, 30LL, 40LL)); +// Test with negative values +TEST_CONSTEXPR(match_v4di(_mm256_permute4x64_epi64(_mm256_set_epi64x(-40LL, -30LL, -20LL, -10LL), 0x1B), -40LL, -30LL, -20LL, -10LL)); __m256d test_mm256_permute4x64_pd(__m256d a) { // CHECK-LABEL: test_mm256_permute4x64_pd // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> poison, <4 x i32> return _mm256_permute4x64_pd(a, 25); } +// Control value 0x00: [0,0,0,0] -> broadcast element 0 +TEST_CONSTEXPR(match_m256d(_mm256_permute4x64_pd(_mm256_set_pd(4.0, 3.0, 2.0, 1.0), 0x00), 1.0, 1.0, 1.0, 1.0)); +// Control value 0x1B: [0,1,2,3] -> reverse order [3,2,1,0] = [D,C,B,A] +TEST_CONSTEXPR(match_m256d(_mm256_permute4x64_pd(_mm256_set_pd(4.0, 3.0, 2.0, 1.0), 0x1B), 4.0, 3.0, 2.0, 1.0)); +// Control value 0x39: [1,2,3,0] -> rotate left [B,C,D,A] +TEST_CONSTEXPR(match_m256d(_mm256_permute4x64_pd(_mm256_set_pd(4.0, 3.0, 2.0, 1.0), 0x39), 2.0, 3.0, 4.0, 1.0)); +// Control value 0x12: [2,1,0,1] -> [C,B,A,B] +TEST_CONSTEXPR(match_m256d(_mm256_permute4x64_pd(_mm256_set_pd(4.0, 3.0, 2.0, 1.0), 0x12), 3.0, 2.0, 1.0, 2.0)); +// Control value 0xE4: [3,2,1,0] -> identity [A,B,C,D] +TEST_CONSTEXPR(match_m256d(_mm256_permute4x64_pd(_mm256_set_pd(4.0, 3.0, 2.0, 1.0), 0xE4), 1.0, 2.0, 3.0, 4.0)); __m256i test_mm256_permutevar8x32_epi32(__m256i a, __m256i b) { // CHECK-LABEL: test_mm256_permutevar8x32_epi32 From 962ed403e39bb5e3d66812653c9b7453bce53023 Mon Sep 17 00:00:00 2001 From: ahmed Date: Wed, 3 Dec 2025 10:53:34 +0200 Subject: [PATCH 3/9] chore: format files --- clang/include/clang/Basic/BuiltinsX86.td | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td index d07ded80c2b1b..23eee6df926a1 100644 --- a/clang/include/clang/Basic/BuiltinsX86.td +++ b/clang/include/clang/Basic/BuiltinsX86.td @@ -580,12 +580,13 @@ let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] i def permti256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<4, long long int>, _Constant int)">; } -let Features = "avx2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in { - def permdf256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Constant int)">; +let Features = "avx2", + Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in { + def permdf256 + : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Constant int)">; def permdi256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Constant int)">; } - let Features = "avx2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in { def pmovmskb256 : X86Builtin<"int(_Vector<32, char>)">; def pavgb256 : X86Builtin<"_Vector<32, unsigned char>(_Vector<32, unsigned char>, _Vector<32, unsigned char>)">; From 4ec125ac773efc610424bce2b4cc6887d54f871f Mon Sep 17 00:00:00 2001 From: ahmed Date: Wed, 3 Dec 2025 11:04:31 +0200 Subject: [PATCH 4/9] refactor: Fix tests and revert changes in intrinsics header file --- clang/lib/Headers/avx2intrin.h | 12 ++++-------- clang/test/CodeGen/X86/avx2-builtins.c | 8 ++++---- 2 files changed, 8 insertions(+), 12 deletions(-) diff --git a/clang/lib/Headers/avx2intrin.h b/clang/lib/Headers/avx2intrin.h index dbc3dd01c3cc7..d3ceb2327ac62 100644 --- a/clang/lib/Headers/avx2intrin.h +++ b/clang/lib/Headers/avx2intrin.h @@ -3238,10 +3238,8 @@ _mm256_permutevar8x32_epi32(__m256i __a, __m256i __b) { /// \a M[1:0] specifies the index in \a a for element 0 of the result, /// \a M[3:2] specifies the index for element 1, and so forth. /// \returns A 256-bit vector of [4 x double] containing the result. -static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR -_mm256_permute4x64_pd(__m256d __V, const int __M) { - return (__m256d)__builtin_ia32_permdf256((__v4df)(__m256d)(__V), (int)(__M)); -} +#define _mm256_permute4x64_pd(V, M) \ + ((__m256d)__builtin_ia32_permdf256((__v4df)(__m256d)(V), (int)(M))) /// Sets the result's 256-bit vector of [8 x float] to copies of elements of /// the 256-bit vector of [8 x float] in \a __a as specified by indexes in @@ -3297,10 +3295,8 @@ _mm256_permutevar8x32_ps(__m256 __a, __m256i __b) { /// \a M[1:0] specifies the index in \a a for element 0 of the result, /// \a M[3:2] specifies the index for element 1, and so forth. /// \returns A 256-bit vector of [4 x i64] containing the result. -static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR -_mm256_permute4x64_epi64(__m256i __V, const int __M) { - return (__m256i)__builtin_ia32_permdi256((__v4di)(__m256i)(__V), (int)(__M)); -} +#define _mm256_permute4x64_epi64(V, M) \ + ((__m256i)__builtin_ia32_permdi256((__v4di)(__m256i)(V), (int)(M))) /// Sets each half of the 256-bit result either to zero or to one of the /// four possible 128-bit halves of the 256-bit vectors \a V1 and \a V2, diff --git a/clang/test/CodeGen/X86/avx2-builtins.c b/clang/test/CodeGen/X86/avx2-builtins.c index 83e7f2a25cadb..1f7b2fe7e2d39 100644 --- a/clang/test/CodeGen/X86/avx2-builtins.c +++ b/clang/test/CodeGen/X86/avx2-builtins.c @@ -1117,8 +1117,8 @@ TEST_CONSTEXPR(match_v4di(_mm256_permute4x64_epi64(_mm256_set_epi64x(40LL, 30LL, TEST_CONSTEXPR(match_v4di(_mm256_permute4x64_epi64(_mm256_set_epi64x(40LL, 30LL, 20LL, 10LL), 0x1B), 40LL, 30LL, 20LL, 10LL)); // Control value 0x39: [1,2,3,0] -> rotate left [B,C,D,A] TEST_CONSTEXPR(match_v4di(_mm256_permute4x64_epi64(_mm256_set_epi64x(40LL, 30LL, 20LL, 10LL), 0x39), 20LL, 30LL, 40LL, 10LL)); -// Control value 0x12: [2,1,0,1] -> [C,B,A,B] -TEST_CONSTEXPR(match_v4di(_mm256_permute4x64_epi64(_mm256_set_epi64x(40LL, 30LL, 20LL, 10LL), 0x12), 30LL, 20LL, 10LL, 20LL)); +// Control value 0x12: [2,0,1,0] -> [C,A,B,A] +TEST_CONSTEXPR(match_v4di(_mm256_permute4x64_epi64(_mm256_set_epi64x(40LL, 30LL, 20LL, 10LL), 0x12), 30LL, 10LL, 20LL, 10LL)); // Control value 0xE4: [3,2,1,0] -> identity [A,B,C,D] TEST_CONSTEXPR(match_v4di(_mm256_permute4x64_epi64(_mm256_set_epi64x(40LL, 30LL, 20LL, 10LL), 0xE4), 10LL, 20LL, 30LL, 40LL)); // Test with negative values @@ -1135,8 +1135,8 @@ TEST_CONSTEXPR(match_m256d(_mm256_permute4x64_pd(_mm256_set_pd(4.0, 3.0, 2.0, 1. TEST_CONSTEXPR(match_m256d(_mm256_permute4x64_pd(_mm256_set_pd(4.0, 3.0, 2.0, 1.0), 0x1B), 4.0, 3.0, 2.0, 1.0)); // Control value 0x39: [1,2,3,0] -> rotate left [B,C,D,A] TEST_CONSTEXPR(match_m256d(_mm256_permute4x64_pd(_mm256_set_pd(4.0, 3.0, 2.0, 1.0), 0x39), 2.0, 3.0, 4.0, 1.0)); -// Control value 0x12: [2,1,0,1] -> [C,B,A,B] -TEST_CONSTEXPR(match_m256d(_mm256_permute4x64_pd(_mm256_set_pd(4.0, 3.0, 2.0, 1.0), 0x12), 3.0, 2.0, 1.0, 2.0)); +// Control value 0x12: [2,0,1,0] -> [C,A,B,A] +TEST_CONSTEXPR(match_m256d(_mm256_permute4x64_pd(_mm256_set_pd(4.0, 3.0, 2.0, 1.0), 0x12), 3.0, 1.0, 2.0, 1.0)); // Control value 0xE4: [3,2,1,0] -> identity [A,B,C,D] TEST_CONSTEXPR(match_m256d(_mm256_permute4x64_pd(_mm256_set_pd(4.0, 3.0, 2.0, 1.0), 0xE4), 1.0, 2.0, 3.0, 4.0)); From 10510e276975a110ca05a6df4555e0aa8968fc21 Mon Sep 17 00:00:00 2001 From: ahmed Date: Wed, 3 Dec 2025 11:04:49 +0200 Subject: [PATCH 5/9] chore: Fix formatting --- clang/lib/Headers/avx2intrin.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/clang/lib/Headers/avx2intrin.h b/clang/lib/Headers/avx2intrin.h index d3ceb2327ac62..4c73a4a59e326 100644 --- a/clang/lib/Headers/avx2intrin.h +++ b/clang/lib/Headers/avx2intrin.h @@ -3238,7 +3238,7 @@ _mm256_permutevar8x32_epi32(__m256i __a, __m256i __b) { /// \a M[1:0] specifies the index in \a a for element 0 of the result, /// \a M[3:2] specifies the index for element 1, and so forth. /// \returns A 256-bit vector of [4 x double] containing the result. -#define _mm256_permute4x64_pd(V, M) \ +#define _mm256_permute4x64_pd(V, M) \ ((__m256d)__builtin_ia32_permdf256((__v4df)(__m256d)(V), (int)(M))) /// Sets the result's 256-bit vector of [8 x float] to copies of elements of @@ -3295,7 +3295,7 @@ _mm256_permutevar8x32_ps(__m256 __a, __m256i __b) { /// \a M[1:0] specifies the index in \a a for element 0 of the result, /// \a M[3:2] specifies the index for element 1, and so forth. /// \returns A 256-bit vector of [4 x i64] containing the result. -#define _mm256_permute4x64_epi64(V, M) \ +#define _mm256_permute4x64_epi64(V, M) \ ((__m256i)__builtin_ia32_permdi256((__v4di)(__m256i)(V), (int)(M))) /// Sets each half of the 256-bit result either to zero or to one of the From 86b263b620acedbbbde20fc2e02b05105a64a271 Mon Sep 17 00:00:00 2001 From: ahmed Date: Wed, 3 Dec 2025 11:14:12 +0200 Subject: [PATCH 6/9] chore: revert formatting changes --- clang/lib/Headers/avx2intrin.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/clang/lib/Headers/avx2intrin.h b/clang/lib/Headers/avx2intrin.h index 4c73a4a59e326..d3ceb2327ac62 100644 --- a/clang/lib/Headers/avx2intrin.h +++ b/clang/lib/Headers/avx2intrin.h @@ -3238,7 +3238,7 @@ _mm256_permutevar8x32_epi32(__m256i __a, __m256i __b) { /// \a M[1:0] specifies the index in \a a for element 0 of the result, /// \a M[3:2] specifies the index for element 1, and so forth. /// \returns A 256-bit vector of [4 x double] containing the result. -#define _mm256_permute4x64_pd(V, M) \ +#define _mm256_permute4x64_pd(V, M) \ ((__m256d)__builtin_ia32_permdf256((__v4df)(__m256d)(V), (int)(M))) /// Sets the result's 256-bit vector of [8 x float] to copies of elements of @@ -3295,7 +3295,7 @@ _mm256_permutevar8x32_ps(__m256 __a, __m256i __b) { /// \a M[1:0] specifies the index in \a a for element 0 of the result, /// \a M[3:2] specifies the index for element 1, and so forth. /// \returns A 256-bit vector of [4 x i64] containing the result. -#define _mm256_permute4x64_epi64(V, M) \ +#define _mm256_permute4x64_epi64(V, M) \ ((__m256i)__builtin_ia32_permdi256((__v4di)(__m256i)(V), (int)(M))) /// Sets each half of the 256-bit result either to zero or to one of the From 358fb5fabe7b5f0937b3a9d7ece80436662ddea6 Mon Sep 17 00:00:00 2001 From: ahmed Date: Wed, 3 Dec 2025 13:35:14 +0200 Subject: [PATCH 7/9] refactor: move to avx2 existing container --- clang/include/clang/Basic/BuiltinsX86.td | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td index 23eee6df926a1..1dbbe9af98207 100644 --- a/clang/include/clang/Basic/BuiltinsX86.td +++ b/clang/include/clang/Basic/BuiltinsX86.td @@ -580,14 +580,9 @@ let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] i def permti256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<4, long long int>, _Constant int)">; } -let Features = "avx2", - Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in { - def permdf256 - : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Constant int)">; - def permdi256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Constant int)">; -} - let Features = "avx2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in { + def permdf256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Constant int)">; + def permdi256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Constant int)">; def pmovmskb256 : X86Builtin<"int(_Vector<32, char>)">; def pavgb256 : X86Builtin<"_Vector<32, unsigned char>(_Vector<32, unsigned char>, _Vector<32, unsigned char>)">; def pavgw256 : X86Builtin<"_Vector<16, unsigned short>(_Vector<16, unsigned short>, _Vector<16, unsigned short>)">; From 7d93e7ff5de8badecdae149003c400e38c4349a5 Mon Sep 17 00:00:00 2001 From: ahmed Date: Wed, 3 Dec 2025 13:37:18 +0200 Subject: [PATCH 8/9] chore: Update formatiing --- clang/include/clang/Basic/BuiltinsX86.td | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td index 1dbbe9af98207..03c82acfda0a8 100644 --- a/clang/include/clang/Basic/BuiltinsX86.td +++ b/clang/include/clang/Basic/BuiltinsX86.td @@ -581,8 +581,10 @@ let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] i } let Features = "avx2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in { - def permdf256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Constant int)">; - def permdi256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Constant int)">; + def permdf256 + : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Constant int)">; + def permdi256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long " + "int>, _Constant int)">; def pmovmskb256 : X86Builtin<"int(_Vector<32, char>)">; def pavgb256 : X86Builtin<"_Vector<32, unsigned char>(_Vector<32, unsigned char>, _Vector<32, unsigned char>)">; def pavgw256 : X86Builtin<"_Vector<16, unsigned short>(_Vector<16, unsigned short>, _Vector<16, unsigned short>)">; From b14e20f5b48d183aabde4ec8345e16ec4925c417 Mon Sep 17 00:00:00 2001 From: ahmed Date: Wed, 3 Dec 2025 21:40:40 +0200 Subject: [PATCH 9/9] refactor: use brace initialization --- clang/test/CodeGen/X86/avx2-builtins.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/clang/test/CodeGen/X86/avx2-builtins.c b/clang/test/CodeGen/X86/avx2-builtins.c index 1f7b2fe7e2d39..c9474e94476fc 100644 --- a/clang/test/CodeGen/X86/avx2-builtins.c +++ b/clang/test/CodeGen/X86/avx2-builtins.c @@ -1112,17 +1112,17 @@ __m256i test_mm256_permute4x64_epi64(__m256i a) { return _mm256_permute4x64_epi64(a, 35); } // Control value 0x00: [0,0,0,0] -> broadcast element 0 -TEST_CONSTEXPR(match_v4di(_mm256_permute4x64_epi64(_mm256_set_epi64x(40LL, 30LL, 20LL, 10LL), 0x00), 10LL, 10LL, 10LL, 10LL)); +TEST_CONSTEXPR(match_v4di(_mm256_permute4x64_epi64(((__m256i)(__v4di){40LL, 30LL, 20LL, 10LL}), 0x00), 40LL, 40LL, 40LL, 40LL)); // Control value 0x1B: [0,1,2,3] -> reverse order [3,2,1,0] = [D,C,B,A] -TEST_CONSTEXPR(match_v4di(_mm256_permute4x64_epi64(_mm256_set_epi64x(40LL, 30LL, 20LL, 10LL), 0x1B), 40LL, 30LL, 20LL, 10LL)); +TEST_CONSTEXPR(match_v4di(_mm256_permute4x64_epi64(((__m256i)(__v4di){40LL, 30LL, 20LL, 10LL}), 0x1B), 10LL, 20LL, 30LL, 40LL)); // Control value 0x39: [1,2,3,0] -> rotate left [B,C,D,A] -TEST_CONSTEXPR(match_v4di(_mm256_permute4x64_epi64(_mm256_set_epi64x(40LL, 30LL, 20LL, 10LL), 0x39), 20LL, 30LL, 40LL, 10LL)); +TEST_CONSTEXPR(match_v4di(_mm256_permute4x64_epi64(((__m256i)(__v4di){40LL, 30LL, 20LL, 10LL}), 0x39), 30LL, 20LL, 10LL, 40LL)); // Control value 0x12: [2,0,1,0] -> [C,A,B,A] -TEST_CONSTEXPR(match_v4di(_mm256_permute4x64_epi64(_mm256_set_epi64x(40LL, 30LL, 20LL, 10LL), 0x12), 30LL, 10LL, 20LL, 10LL)); +TEST_CONSTEXPR(match_v4di(_mm256_permute4x64_epi64(((__m256i)(__v4di){40LL, 30LL, 20LL, 10LL}), 0x12), 20LL, 40LL, 30LL, 40LL)); // Control value 0xE4: [3,2,1,0] -> identity [A,B,C,D] -TEST_CONSTEXPR(match_v4di(_mm256_permute4x64_epi64(_mm256_set_epi64x(40LL, 30LL, 20LL, 10LL), 0xE4), 10LL, 20LL, 30LL, 40LL)); +TEST_CONSTEXPR(match_v4di(_mm256_permute4x64_epi64(((__m256i)(__v4di){40LL, 30LL, 20LL, 10LL}), 0xE4), 40LL, 30LL, 20LL, 10LL)); // Test with negative values -TEST_CONSTEXPR(match_v4di(_mm256_permute4x64_epi64(_mm256_set_epi64x(-40LL, -30LL, -20LL, -10LL), 0x1B), -40LL, -30LL, -20LL, -10LL)); +TEST_CONSTEXPR(match_v4di(_mm256_permute4x64_epi64(((__m256i)(__v4di){-40LL, -30LL, -20LL, -10LL}), 0x1B), -10LL, -20LL, -30LL, -40LL)); __m256d test_mm256_permute4x64_pd(__m256d a) { // CHECK-LABEL: test_mm256_permute4x64_pd @@ -1130,15 +1130,15 @@ __m256d test_mm256_permute4x64_pd(__m256d a) { return _mm256_permute4x64_pd(a, 25); } // Control value 0x00: [0,0,0,0] -> broadcast element 0 -TEST_CONSTEXPR(match_m256d(_mm256_permute4x64_pd(_mm256_set_pd(4.0, 3.0, 2.0, 1.0), 0x00), 1.0, 1.0, 1.0, 1.0)); +TEST_CONSTEXPR(match_m256d(_mm256_permute4x64_pd(((__m256d){4.0, 3.0, 2.0, 1.0}), 0x00), 4.0, 4.0, 4.0, 4.0)); // Control value 0x1B: [0,1,2,3] -> reverse order [3,2,1,0] = [D,C,B,A] -TEST_CONSTEXPR(match_m256d(_mm256_permute4x64_pd(_mm256_set_pd(4.0, 3.0, 2.0, 1.0), 0x1B), 4.0, 3.0, 2.0, 1.0)); +TEST_CONSTEXPR(match_m256d(_mm256_permute4x64_pd(((__m256d){4.0, 3.0, 2.0, 1.0}), 0x1B), 1.0, 2.0, 3.0, 4.0)); // Control value 0x39: [1,2,3,0] -> rotate left [B,C,D,A] -TEST_CONSTEXPR(match_m256d(_mm256_permute4x64_pd(_mm256_set_pd(4.0, 3.0, 2.0, 1.0), 0x39), 2.0, 3.0, 4.0, 1.0)); +TEST_CONSTEXPR(match_m256d(_mm256_permute4x64_pd(((__m256d){4.0, 3.0, 2.0, 1.0}), 0x39), 3.0, 2.0, 1.0, 4.0)); // Control value 0x12: [2,0,1,0] -> [C,A,B,A] -TEST_CONSTEXPR(match_m256d(_mm256_permute4x64_pd(_mm256_set_pd(4.0, 3.0, 2.0, 1.0), 0x12), 3.0, 1.0, 2.0, 1.0)); +TEST_CONSTEXPR(match_m256d(_mm256_permute4x64_pd(((__m256d){4.0, 3.0, 2.0, 1.0}), 0x12), 2.0, 4.0, 3.0, 4.0)); // Control value 0xE4: [3,2,1,0] -> identity [A,B,C,D] -TEST_CONSTEXPR(match_m256d(_mm256_permute4x64_pd(_mm256_set_pd(4.0, 3.0, 2.0, 1.0), 0xE4), 1.0, 2.0, 3.0, 4.0)); +TEST_CONSTEXPR(match_m256d(_mm256_permute4x64_pd(((__m256d){4.0, 3.0, 2.0, 1.0}), 0xE4), 4.0, 3.0, 2.0, 1.0)); __m256i test_mm256_permutevar8x32_epi32(__m256i a, __m256i b) { // CHECK-LABEL: test_mm256_permutevar8x32_epi32