Skip to content

Commit 3afbda0

Browse files
authored
[Clang] VectorExprEvaluator::VisitCallExpr / InterpretBuiltin - allow SSE41 phminposuw intrinsic to be used in constexp (#163041)
Fix #161336
1 parent aac8a0d commit 3afbda0

File tree

5 files changed

+91
-3
lines changed

5 files changed

+91
-3
lines changed

clang/include/clang/Basic/BuiltinsX86.td

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -334,8 +334,8 @@ let Features = "sse4.1", Attributes = [NoThrow, Const, RequiredVectorWidth<128>]
334334
def dpps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Constant char)">;
335335
def dppd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, "
336336
"_Vector<2,double>, _Constant char)">;
337-
def mpsadbw128 : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<16, char>, _Constant char)">;
338-
def phminposuw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>)">;
337+
def mpsadbw128 : X86Builtin<"_Vector<16, char>(_Vector<16, char>, "
338+
"_Vector<16, char>, _Constant char)">;
339339
}
340340

341341
let Features = "sse4.1",
@@ -358,6 +358,7 @@ let Features = "sse4.1", Attributes = [NoThrow, Const, Constexpr, RequiredVector
358358

359359
def pmuldq128 : X86Builtin<"_Vector<2, long long int>(_Vector<4, int>, _Vector<4, int>)">;
360360
def packusdw128 : X86Builtin<"_Vector<8, short>(_Vector<4, int>, _Vector<4, int>)">;
361+
def phminposuw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>)">;
361362

362363
def vec_ext_v16qi : X86Builtin<"char(_Vector<16, char>, _Constant int)">;
363364
def vec_set_v16qi : X86Builtin<"_Vector<16, char>(_Vector<16, char>, char, _Constant int)">;

clang/lib/AST/ByteCode/InterpBuiltin.cpp

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3003,6 +3003,45 @@ static bool interp__builtin_x86_insert_subvector(InterpState &S, CodePtr OpPC,
30033003
return true;
30043004
}
30053005

3006+
static bool interp__builtin_ia32_phminposuw(InterpState &S, CodePtr OpPC,
3007+
const CallExpr *Call) {
3008+
assert(Call->getNumArgs() == 1);
3009+
3010+
const Pointer &Source = S.Stk.pop<Pointer>();
3011+
const Pointer &Dest = S.Stk.peek<Pointer>();
3012+
3013+
unsigned SourceLen = Source.getNumElems();
3014+
QualType ElemQT = getElemType(Source);
3015+
OptPrimType ElemT = S.getContext().classify(ElemQT);
3016+
unsigned ElemBitWidth = S.getASTContext().getTypeSize(ElemQT);
3017+
3018+
bool DestUnsigned = Call->getCallReturnType(S.getASTContext())
3019+
->castAs<VectorType>()
3020+
->getElementType()
3021+
->isUnsignedIntegerOrEnumerationType();
3022+
3023+
INT_TYPE_SWITCH_NO_BOOL(*ElemT, {
3024+
APSInt MinIndex(ElemBitWidth, DestUnsigned);
3025+
APSInt MinVal = Source.elem<T>(0).toAPSInt();
3026+
3027+
for (unsigned I = 1; I != SourceLen; ++I) {
3028+
APSInt Val = Source.elem<T>(I).toAPSInt();
3029+
if (MinVal.ugt(Val)) {
3030+
MinVal = Val;
3031+
MinIndex = I;
3032+
}
3033+
}
3034+
3035+
Dest.elem<T>(0) = static_cast<T>(MinVal);
3036+
Dest.elem<T>(1) = static_cast<T>(MinIndex);
3037+
for (unsigned I = 2; I != SourceLen; ++I) {
3038+
Dest.elem<T>(I) = static_cast<T>(APSInt(ElemBitWidth, DestUnsigned));
3039+
}
3040+
});
3041+
Dest.initializeAllElements();
3042+
return true;
3043+
}
3044+
30063045
static bool interp__builtin_ia32_pternlog(InterpState &S, CodePtr OpPC,
30073046
const CallExpr *Call, bool MaskZ) {
30083047
assert(Call->getNumArgs() == 5);
@@ -4087,6 +4126,9 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
40874126
S, OpPC, Call,
40884127
[](const APSInt &LHS, const APSInt &RHS) { return LHS + RHS; });
40894128

4129+
case X86::BI__builtin_ia32_phminposuw128:
4130+
return interp__builtin_ia32_phminposuw(S, OpPC, Call);
4131+
40904132
case X86::BI__builtin_ia32_pternlogd128_mask:
40914133
case X86::BI__builtin_ia32_pternlogd256_mask:
40924134
case X86::BI__builtin_ia32_pternlogd512_mask:

clang/lib/AST/ExprConstant.cpp

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12353,6 +12353,40 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
1235312353
return Success(R, E);
1235412354
}
1235512355

12356+
case X86::BI__builtin_ia32_phminposuw128: {
12357+
APValue Source;
12358+
if (!Evaluate(Source, Info, E->getArg(0)))
12359+
return false;
12360+
unsigned SourceLen = Source.getVectorLength();
12361+
const VectorType *VT = E->getArg(0)->getType()->castAs<VectorType>();
12362+
QualType ElemQT = VT->getElementType();
12363+
unsigned ElemBitWidth = Info.Ctx.getTypeSize(ElemQT);
12364+
12365+
APInt MinIndex(ElemBitWidth, 0);
12366+
APInt MinVal = Source.getVectorElt(0).getInt();
12367+
for (unsigned I = 1; I != SourceLen; ++I) {
12368+
APInt Val = Source.getVectorElt(I).getInt();
12369+
if (MinVal.ugt(Val)) {
12370+
MinVal = Val;
12371+
MinIndex = I;
12372+
}
12373+
}
12374+
12375+
bool ResultUnsigned = E->getCallReturnType(Info.Ctx)
12376+
->castAs<VectorType>()
12377+
->getElementType()
12378+
->isUnsignedIntegerOrEnumerationType();
12379+
12380+
SmallVector<APValue, 8> Result;
12381+
Result.reserve(SourceLen);
12382+
Result.emplace_back(APSInt(MinVal, ResultUnsigned));
12383+
Result.emplace_back(APSInt(MinIndex, ResultUnsigned));
12384+
for (unsigned I = 0; I != SourceLen - 2; ++I) {
12385+
Result.emplace_back(APSInt(APInt(ElemBitWidth, 0), ResultUnsigned));
12386+
}
12387+
return Success(APValue(Result.data(), Result.size()), E);
12388+
}
12389+
1235612390
case X86::BI__builtin_ia32_pternlogd128_mask:
1235712391
case X86::BI__builtin_ia32_pternlogd256_mask:
1235812392
case X86::BI__builtin_ia32_pternlogd512_mask:

clang/lib/Headers/smmintrin.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1524,7 +1524,8 @@ _mm_packus_epi32(__m128i __V1, __m128i __V2) {
15241524
/// \returns A 128-bit value where bits [15:0] contain the minimum value found
15251525
/// in parameter \a __V, bits [18:16] contain the index of the minimum value
15261526
/// and the remaining bits are set to 0.
1527-
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_minpos_epu16(__m128i __V) {
1527+
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
1528+
_mm_minpos_epu16(__m128i __V) {
15281529
return (__m128i)__builtin_ia32_phminposuw128((__v8hi)__V);
15291530
}
15301531

clang/test/CodeGen/X86/sse41-builtins.c

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -376,6 +376,16 @@ __m128i test_mm_minpos_epu16(__m128i x) {
376376
// CHECK: call <8 x i16> @llvm.x86.sse41.phminposuw(<8 x i16> %{{.*}})
377377
return _mm_minpos_epu16(x);
378378
}
379+
TEST_CONSTEXPR(match_v8hu(_mm_minpos_epu16((__m128i)(__v8hu){0,0,0,0, 0,0,0,0}), 0,0,0,0, 0,0,0,0));
380+
TEST_CONSTEXPR(match_v8hu(_mm_minpos_epu16((__m128i)(__v8hu){1,0,0,0, 0,0,0,0}), 0,1,0,0, 0,0,0,0));
381+
TEST_CONSTEXPR(match_v8hu(_mm_minpos_epu16((__m128i)(__v8hu){65535,65535,65535,65535,65535,65535,65535,65535}), 65535,0,0,0, 0,0,0,0));
382+
TEST_CONSTEXPR(match_v8hu(_mm_minpos_epu16((__m128i)(__v8hu){9,8,7,6,5,4,3,2}), 2,7,0,0, 0,0,0,0));
383+
TEST_CONSTEXPR(match_v8hu(_mm_minpos_epu16((__m128i)(__v8hu){5,5,5,5,5,5,5,5}), 5,0,0,0, 0,0,0,0));
384+
TEST_CONSTEXPR(match_v8hu(_mm_minpos_epu16((__m128i)(__v8hu){5,7,9,4,10,4,11,12}), 4,3,0,0, 0,0,0,0));
385+
TEST_CONSTEXPR(match_v8hu(_mm_minpos_epu16((__m128i)(__v8hu){6,0,0,0,0,0,0,0}), 0,1,0,0, 0,0,0,0));
386+
TEST_CONSTEXPR(match_v8hu(_mm_minpos_epu16((__m128i)(__v8hu){1000,2000,3000,4000,5000,6000,7000,1}), 1,7,0,0, 0,0,0,0));
387+
TEST_CONSTEXPR(match_v8hu(_mm_minpos_epu16((__m128i)(__v8hu){1234,5678,42,9999,65535,0,4242,42}), 0,5,0,0, 0,0,0,0));
388+
TEST_CONSTEXPR(match_v8hu(_mm_minpos_epu16((__m128i)(__v8hu){400,500,12,600,12,700,800,900}), 12,2,0,0, 0,0,0,0));
379389

380390
__m128i test_mm_mpsadbw_epu8(__m128i x, __m128i y) {
381391
// CHECK-LABEL: test_mm_mpsadbw_epu8

0 commit comments

Comments
 (0)