Skip to content

Commit 6d54982

Browse files
markbhasawutgithub-actions[bot]
authored andcommitted
Automerge: [Headers][X86] Enable constexpr handling for MMX/SSE/AVX/AVX512 PMADDWD/PMADDUBSW intrinsics (#161563)
This PR updates the PMADDWD/PMADDUBSW builtins to support constant expression handling, by extending the VectorExprEvaluator::VisitCallExpr that handles interp__builtin_ia32_pmadd builtins. Closes #155392
2 parents 4c29fe3 + ee19231 commit 6d54982

File tree

15 files changed

+182
-45
lines changed

15 files changed

+182
-45
lines changed

clang/include/clang/Basic/BuiltinsX86.td

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -123,13 +123,16 @@ let Attributes = [Const, NoThrow, RequiredVectorWidth<128>] in {
123123
def Op#d128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>)">;
124124
}
125125

126-
def pmaddubsw128 : X86Builtin<"_Vector<8, short>(_Vector<16, char>, _Vector<16, char>)">;
127126
def pmulhrsw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">;
128127
def pshufb128 : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<16, char>)">;
129128
def psignb128 : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<16, char>)">;
130129
def psignw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">;
131130
def psignd128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>)">;
132131
}
132+
133+
let Features = "ssse3", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
134+
def pmaddubsw128 : X86Builtin<"_Vector<8, short>(_Vector<16, char>, _Vector<16, char>)">;
135+
}
133136
}
134137

135138
// AVX
@@ -278,13 +281,14 @@ let Features = "sse2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] i
278281
def psllw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">;
279282
def pslld128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>)">;
280283
def psllq128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>)">;
281-
def pmaddwd128 : X86Builtin<"_Vector<4, int>(_Vector<8, short>, _Vector<8, short>)">;
282284
def pslldqi128_byteshift : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Constant int)">;
283285
def psrldqi128_byteshift : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Constant int)">;
284286
}
285287

286288
let Features = "sse2",
287289
Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
290+
def pmaddwd128 : X86Builtin<"_Vector<4, int>(_Vector<8, short>, _Vector<8, short>)">;
291+
288292
def pmuludq128 : X86Builtin<"_Vector<2, long long int>(_Vector<4, int>, _Vector<4, int>)">;
289293

290294
def psllwi128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, int)">;
@@ -581,8 +585,6 @@ let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] i
581585
def phsubw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">;
582586
def phsubd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>)">;
583587
def phsubsw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">;
584-
def pmaddubsw256 : X86Builtin<"_Vector<16, short>(_Vector<32, char>, _Vector<32, char>)">;
585-
def pmaddwd256 : X86Builtin<"_Vector<8, int>(_Vector<16, short>, _Vector<16, short>)">;
586588
def pmovmskb256 : X86Builtin<"int(_Vector<32, char>)">;
587589
def pmulhrsw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">;
588590
def psadbw256 : X86Builtin<"_Vector<4, long long int>(_Vector<32, char>, _Vector<32, char>)">;
@@ -619,6 +621,9 @@ let Features = "avx2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWi
619621

620622
def pblendvb256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>, _Vector<32, char>)">;
621623

624+
def pmaddubsw256 : X86Builtin<"_Vector<16, short>(_Vector<32, char>, _Vector<32, char>)">;
625+
def pmaddwd256 : X86Builtin<"_Vector<8, int>(_Vector<16, short>, _Vector<16, short>)">;
626+
622627
def pmuldq256 : X86Builtin<"_Vector<4, long long int>(_Vector<8, int>, _Vector<8, int>)">;
623628
def pmuludq256 : X86Builtin<"_Vector<4, long long int>(_Vector<8, int>, _Vector<8, int>)">;
624629

@@ -1378,10 +1383,6 @@ let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<512>
13781383
def subps512 : X86Builtin<"_Vector<16, float>(_Vector<16, float>, _Vector<16, float>, _Constant int)">;
13791384
}
13801385

1381-
let Features = "avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
1382-
def pmaddubsw512 : X86Builtin<"_Vector<32, short>(_Vector<64, char>, _Vector<64, char>)">;
1383-
def pmaddwd512 : X86Builtin<"_Vector<16, int>(_Vector<32, short>, _Vector<32, short>)">;
1384-
}
13851386

13861387
let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
13871388
def addss_round_mask : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Vector<4, float>, unsigned char, _Constant int)">;
@@ -1999,6 +2000,8 @@ let Features = "avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<512
19992000
}
20002001

20012002
let Features = "avx512bw", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
2003+
def pmaddubsw512 : X86Builtin<"_Vector<32, short>(_Vector<64, char>, _Vector<64, char>)">;
2004+
def pmaddwd512 : X86Builtin<"_Vector<16, int>(_Vector<32, short>, _Vector<32, short>)">;
20022005
def psllv32hi : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Vector<32, short>)">;
20032006
def pshufhw512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Constant int)">;
20042007
def pshuflw512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Constant int)">;

clang/lib/AST/ByteCode/InterpBuiltin.cpp

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2549,6 +2549,44 @@ static bool interp__builtin_elementwise_maxmin(InterpState &S, CodePtr OpPC,
25492549
return true;
25502550
}
25512551

2552+
static bool interp__builtin_ia32_pmadd(
2553+
InterpState &S, CodePtr OpPC, const CallExpr *Call,
2554+
llvm::function_ref<APInt(const APSInt &, const APSInt &, const APSInt &,
2555+
const APSInt &)>
2556+
Fn) {
2557+
assert(Call->getArg(0)->getType()->isVectorType() &&
2558+
Call->getArg(1)->getType()->isVectorType());
2559+
const Pointer &RHS = S.Stk.pop<Pointer>();
2560+
const Pointer &LHS = S.Stk.pop<Pointer>();
2561+
const Pointer &Dst = S.Stk.peek<Pointer>();
2562+
2563+
const auto *VT = Call->getArg(0)->getType()->castAs<VectorType>();
2564+
PrimType ElemT = *S.getContext().classify(VT->getElementType());
2565+
unsigned NumElems = VT->getNumElements();
2566+
const auto *DestVT = Call->getType()->castAs<VectorType>();
2567+
PrimType DestElemT = *S.getContext().classify(DestVT->getElementType());
2568+
bool DestUnsigned = Call->getType()->isUnsignedIntegerOrEnumerationType();
2569+
2570+
unsigned DstElem = 0;
2571+
for (unsigned I = 0; I != NumElems; I += 2) {
2572+
APSInt Result;
2573+
INT_TYPE_SWITCH_NO_BOOL(ElemT, {
2574+
APSInt LoLHS = LHS.elem<T>(I).toAPSInt();
2575+
APSInt HiLHS = LHS.elem<T>(I + 1).toAPSInt();
2576+
APSInt LoRHS = RHS.elem<T>(I).toAPSInt();
2577+
APSInt HiRHS = RHS.elem<T>(I + 1).toAPSInt();
2578+
Result = APSInt(Fn(LoLHS, HiLHS, LoRHS, HiRHS), DestUnsigned);
2579+
});
2580+
2581+
INT_TYPE_SWITCH_NO_BOOL(DestElemT,
2582+
{ Dst.elem<T>(DstElem) = static_cast<T>(Result); });
2583+
++DstElem;
2584+
}
2585+
2586+
Dst.initializeAllElements();
2587+
return true;
2588+
}
2589+
25522590
static bool interp__builtin_ia32_pmul(InterpState &S, CodePtr OpPC,
25532591
const CallExpr *Call,
25542592
unsigned BuiltinID) {
@@ -3471,6 +3509,30 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
34713509
return interp__builtin_elementwise_int_binop(S, OpPC, Call,
34723510
llvm::APIntOps::avgCeilU);
34733511

3512+
case clang::X86::BI__builtin_ia32_pmaddubsw128:
3513+
case clang::X86::BI__builtin_ia32_pmaddubsw256:
3514+
case clang::X86::BI__builtin_ia32_pmaddubsw512:
3515+
return interp__builtin_ia32_pmadd(
3516+
S, OpPC, Call,
3517+
[](const APSInt &LoLHS, const APSInt &HiLHS, const APSInt &LoRHS,
3518+
const APSInt &HiRHS) {
3519+
unsigned BitWidth = 2 * LoLHS.getBitWidth();
3520+
return (LoLHS.zext(BitWidth) * LoRHS.sext(BitWidth))
3521+
.sadd_sat((HiLHS.zext(BitWidth) * HiRHS.sext(BitWidth)));
3522+
});
3523+
3524+
case clang::X86::BI__builtin_ia32_pmaddwd128:
3525+
case clang::X86::BI__builtin_ia32_pmaddwd256:
3526+
case clang::X86::BI__builtin_ia32_pmaddwd512:
3527+
return interp__builtin_ia32_pmadd(
3528+
S, OpPC, Call,
3529+
[](const APSInt &LoLHS, const APSInt &HiLHS, const APSInt &LoRHS,
3530+
const APSInt &HiRHS) {
3531+
unsigned BitWidth = 2 * LoLHS.getBitWidth();
3532+
return (LoLHS.sext(BitWidth) * LoRHS.sext(BitWidth)) +
3533+
(HiLHS.sext(BitWidth) * HiRHS.sext(BitWidth));
3534+
});
3535+
34743536
case clang::X86::BI__builtin_ia32_pmulhuw128:
34753537
case clang::X86::BI__builtin_ia32_pmulhuw256:
34763538
case clang::X86::BI__builtin_ia32_pmulhuw512:

clang/lib/AST/ExprConstant.cpp

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11778,6 +11778,54 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
1177811778
case clang::X86::BI__builtin_ia32_pavgw512:
1177911779
return EvaluateBinOpExpr(llvm::APIntOps::avgCeilU);
1178011780

11781+
case clang::X86::BI__builtin_ia32_pmaddubsw128:
11782+
case clang::X86::BI__builtin_ia32_pmaddubsw256:
11783+
case clang::X86::BI__builtin_ia32_pmaddubsw512:
11784+
case clang::X86::BI__builtin_ia32_pmaddwd128:
11785+
case clang::X86::BI__builtin_ia32_pmaddwd256:
11786+
case clang::X86::BI__builtin_ia32_pmaddwd512: {
11787+
APValue SourceLHS, SourceRHS;
11788+
if (!EvaluateAsRValue(Info, E->getArg(0), SourceLHS) ||
11789+
!EvaluateAsRValue(Info, E->getArg(1), SourceRHS))
11790+
return false;
11791+
11792+
auto *DestTy = E->getType()->castAs<VectorType>();
11793+
QualType DestEltTy = DestTy->getElementType();
11794+
unsigned SourceLen = SourceLHS.getVectorLength();
11795+
bool DestUnsigned = DestEltTy->isUnsignedIntegerOrEnumerationType();
11796+
SmallVector<APValue, 4> ResultElements;
11797+
ResultElements.reserve(SourceLen / 2);
11798+
11799+
for (unsigned EltNum = 0; EltNum < SourceLen; EltNum += 2) {
11800+
const APSInt &LoLHS = SourceLHS.getVectorElt(EltNum).getInt();
11801+
const APSInt &HiLHS = SourceLHS.getVectorElt(EltNum + 1).getInt();
11802+
const APSInt &LoRHS = SourceRHS.getVectorElt(EltNum).getInt();
11803+
const APSInt &HiRHS = SourceRHS.getVectorElt(EltNum + 1).getInt();
11804+
unsigned BitWidth = 2 * LoLHS.getBitWidth();
11805+
11806+
switch (E->getBuiltinCallee()) {
11807+
case clang::X86::BI__builtin_ia32_pmaddubsw128:
11808+
case clang::X86::BI__builtin_ia32_pmaddubsw256:
11809+
case clang::X86::BI__builtin_ia32_pmaddubsw512:
11810+
ResultElements.push_back(APValue(
11811+
APSInt((LoLHS.zext(BitWidth) * LoRHS.sext(BitWidth))
11812+
.sadd_sat((HiLHS.zext(BitWidth) * HiRHS.sext(BitWidth))),
11813+
DestUnsigned)));
11814+
break;
11815+
case clang::X86::BI__builtin_ia32_pmaddwd128:
11816+
case clang::X86::BI__builtin_ia32_pmaddwd256:
11817+
case clang::X86::BI__builtin_ia32_pmaddwd512:
11818+
ResultElements.push_back(
11819+
APValue(APSInt((LoLHS.sext(BitWidth) * LoRHS.sext(BitWidth)) +
11820+
(HiLHS.sext(BitWidth) * HiRHS.sext(BitWidth)),
11821+
DestUnsigned)));
11822+
break;
11823+
}
11824+
}
11825+
11826+
return Success(APValue(ResultElements.data(), ResultElements.size()), E);
11827+
}
11828+
1178111829
case clang::X86::BI__builtin_ia32_pmulhuw128:
1178211830
case clang::X86::BI__builtin_ia32_pmulhuw256:
1178311831
case clang::X86::BI__builtin_ia32_pmulhuw512:

clang/lib/Headers/avx2intrin.h

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1035,10 +1035,9 @@ _mm256_hsubs_epi16(__m256i __a, __m256i __b)
10351035
/// \param __b
10361036
/// A 256-bit vector containing one of the source operands.
10371037
/// \returns A 256-bit vector of [16 x i16] containing the result.
1038-
static __inline__ __m256i __DEFAULT_FN_ATTRS256
1039-
_mm256_maddubs_epi16(__m256i __a, __m256i __b)
1040-
{
1041-
return (__m256i)__builtin_ia32_pmaddubsw256((__v32qi)__a, (__v32qi)__b);
1038+
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
1039+
_mm256_maddubs_epi16(__m256i __a, __m256i __b) {
1040+
return (__m256i)__builtin_ia32_pmaddubsw256((__v32qi)__a, (__v32qi)__b);
10421041
}
10431042

10441043
/// Multiplies corresponding 16-bit elements of two 256-bit vectors of
@@ -1067,9 +1066,8 @@ _mm256_maddubs_epi16(__m256i __a, __m256i __b)
10671066
/// \param __b
10681067
/// A 256-bit vector of [16 x i16] containing one of the source operands.
10691068
/// \returns A 256-bit vector of [8 x i32] containing the result.
1070-
static __inline__ __m256i __DEFAULT_FN_ATTRS256
1071-
_mm256_madd_epi16(__m256i __a, __m256i __b)
1072-
{
1069+
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
1070+
_mm256_madd_epi16(__m256i __a, __m256i __b) {
10731071
return (__m256i)__builtin_ia32_pmaddwd256((__v16hi)__a, (__v16hi)__b);
10741072
}
10751073

clang/lib/Headers/avx512bwintrin.h

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1064,39 +1064,39 @@ _mm512_maskz_mulhi_epu16(__mmask32 __U, __m512i __A, __m512i __B) {
10641064
(__v32hi)_mm512_setzero_si512());
10651065
}
10661066

1067-
static __inline__ __m512i __DEFAULT_FN_ATTRS512
1067+
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
10681068
_mm512_maddubs_epi16(__m512i __X, __m512i __Y) {
10691069
return (__m512i)__builtin_ia32_pmaddubsw512((__v64qi)__X, (__v64qi)__Y);
10701070
}
10711071

1072-
static __inline__ __m512i __DEFAULT_FN_ATTRS512
1072+
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
10731073
_mm512_mask_maddubs_epi16(__m512i __W, __mmask32 __U, __m512i __X,
10741074
__m512i __Y) {
10751075
return (__m512i)__builtin_ia32_selectw_512((__mmask32) __U,
10761076
(__v32hi)_mm512_maddubs_epi16(__X, __Y),
10771077
(__v32hi)__W);
10781078
}
10791079

1080-
static __inline__ __m512i __DEFAULT_FN_ATTRS512
1080+
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
10811081
_mm512_maskz_maddubs_epi16(__mmask32 __U, __m512i __X, __m512i __Y) {
10821082
return (__m512i)__builtin_ia32_selectw_512((__mmask32) __U,
10831083
(__v32hi)_mm512_maddubs_epi16(__X, __Y),
10841084
(__v32hi)_mm512_setzero_si512());
10851085
}
10861086

1087-
static __inline__ __m512i __DEFAULT_FN_ATTRS512
1087+
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
10881088
_mm512_madd_epi16(__m512i __A, __m512i __B) {
10891089
return (__m512i)__builtin_ia32_pmaddwd512((__v32hi)__A, (__v32hi)__B);
10901090
}
10911091

1092-
static __inline__ __m512i __DEFAULT_FN_ATTRS512
1092+
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
10931093
_mm512_mask_madd_epi16(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) {
10941094
return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
10951095
(__v16si)_mm512_madd_epi16(__A, __B),
10961096
(__v16si)__W);
10971097
}
10981098

1099-
static __inline__ __m512i __DEFAULT_FN_ATTRS512
1099+
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
11001100
_mm512_maskz_madd_epi16(__mmask16 __U, __m512i __A, __m512i __B) {
11011101
return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
11021102
(__v16si)_mm512_madd_epi16(__A, __B),

clang/lib/Headers/avx512vlbwintrin.h

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1295,57 +1295,57 @@ _mm256_maskz_permutex2var_epi16 (__mmask16 __U, __m256i __A, __m256i __I,
12951295
(__v16hi)_mm256_setzero_si256());
12961296
}
12971297

1298-
static __inline__ __m128i __DEFAULT_FN_ATTRS128
1298+
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
12991299
_mm_mask_maddubs_epi16(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y) {
13001300
return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
13011301
(__v8hi)_mm_maddubs_epi16(__X, __Y),
13021302
(__v8hi)__W);
13031303
}
13041304

1305-
static __inline__ __m128i __DEFAULT_FN_ATTRS128
1305+
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
13061306
_mm_maskz_maddubs_epi16(__mmask8 __U, __m128i __X, __m128i __Y) {
13071307
return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
13081308
(__v8hi)_mm_maddubs_epi16(__X, __Y),
13091309
(__v8hi)_mm_setzero_si128());
13101310
}
13111311

1312-
static __inline__ __m256i __DEFAULT_FN_ATTRS256
1312+
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
13131313
_mm256_mask_maddubs_epi16(__m256i __W, __mmask16 __U, __m256i __X,
13141314
__m256i __Y) {
13151315
return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
13161316
(__v16hi)_mm256_maddubs_epi16(__X, __Y),
13171317
(__v16hi)__W);
13181318
}
13191319

1320-
static __inline__ __m256i __DEFAULT_FN_ATTRS256
1320+
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
13211321
_mm256_maskz_maddubs_epi16(__mmask16 __U, __m256i __X, __m256i __Y) {
13221322
return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
13231323
(__v16hi)_mm256_maddubs_epi16(__X, __Y),
13241324
(__v16hi)_mm256_setzero_si256());
13251325
}
13261326

1327-
static __inline__ __m128i __DEFAULT_FN_ATTRS128
1327+
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
13281328
_mm_mask_madd_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
13291329
return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
13301330
(__v4si)_mm_madd_epi16(__A, __B),
13311331
(__v4si)__W);
13321332
}
13331333

1334-
static __inline__ __m128i __DEFAULT_FN_ATTRS128
1334+
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
13351335
_mm_maskz_madd_epi16(__mmask8 __U, __m128i __A, __m128i __B) {
13361336
return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
13371337
(__v4si)_mm_madd_epi16(__A, __B),
13381338
(__v4si)_mm_setzero_si128());
13391339
}
13401340

1341-
static __inline__ __m256i __DEFAULT_FN_ATTRS256
1341+
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
13421342
_mm256_mask_madd_epi16(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) {
13431343
return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
13441344
(__v8si)_mm256_madd_epi16(__A, __B),
13451345
(__v8si)__W);
13461346
}
13471347

1348-
static __inline__ __m256i __DEFAULT_FN_ATTRS256
1348+
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
13491349
_mm256_maskz_madd_epi16(__mmask8 __U, __m256i __A, __m256i __B) {
13501350
return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
13511351
(__v8si)_mm256_madd_epi16(__A, __B),

clang/lib/Headers/emmintrin.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2290,8 +2290,8 @@ _mm_avg_epu16(__m128i __a, __m128i __b) {
22902290
/// A 128-bit signed [8 x i16] vector.
22912291
/// \returns A 128-bit signed [4 x i32] vector containing the sums of products
22922292
/// of both parameters.
2293-
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_madd_epi16(__m128i __a,
2294-
__m128i __b) {
2293+
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
2294+
_mm_madd_epi16(__m128i __a, __m128i __b) {
22952295
return (__m128i)__builtin_ia32_pmaddwd128((__v8hi)__a, (__v8hi)__b);
22962296
}
22972297

clang/lib/Headers/mmintrin.h

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -679,11 +679,10 @@ _mm_subs_pu16(__m64 __m1, __m64 __m2) {
679679
/// A 64-bit integer vector of [4 x i16].
680680
/// \returns A 64-bit integer vector of [2 x i32] containing the sums of
681681
/// products of both parameters.
682-
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
683-
_mm_madd_pi16(__m64 __m1, __m64 __m2)
684-
{
685-
return __trunc64(__builtin_ia32_pmaddwd128((__v8hi)__anyext128(__m1),
686-
(__v8hi)__anyext128(__m2)));
682+
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
683+
_mm_madd_pi16(__m64 __m1, __m64 __m2) {
684+
return __trunc64(__builtin_ia32_pmaddwd128((__v8hi)__zext128(__m1),
685+
(__v8hi)__zext128(__m2)));
687686
}
688687

689688
/// Multiplies each 16-bit signed integer element of the first 64-bit

0 commit comments

Comments
 (0)