-
Notifications
You must be signed in to change notification settings - Fork 15k
[X86][Clang] VectorExprEvaluator::VisitCallExpr / InterpretBuiltin - Allow AVX/AVX512 IFMA madd52 intrinsics to be used in constexpr #161056
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
4b022cd to
bc897b3
Compare
|
@llvm/pr-subscribers-clang @llvm/pr-subscribers-backend-x86 Author: NagaChaitanya Vellanki (chaitanyav) ChangesResolves #160498 Patch is 27.25 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/161056.diff 7 Files Affected:
diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td
index 77e599587edc3..a5247629e255f 100644
--- a/clang/include/clang/Basic/BuiltinsX86.td
+++ b/clang/include/clang/Basic/BuiltinsX86.td
@@ -2101,27 +2101,6 @@ let Features = "avx512vl", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
def movdqa64store256_mask : X86Builtin<"void(_Vector<4, long long int *>, _Vector<4, long long int>, unsigned char)">;
}
-let Features = "avx512ifma", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
- def vpmadd52huq512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<8, long long int>, _Vector<8, long long int>)">;
- def vpmadd52luq512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<8, long long int>, _Vector<8, long long int>)">;
-}
-
-let Features = "avx512ifma,avx512vl|avxifma", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
- def vpmadd52huq128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>, _Vector<2, long long int>)">;
-}
-
-let Features = "avx512ifma,avx512vl|avxifma", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
- def vpmadd52huq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<4, long long int>, _Vector<4, long long int>)">;
-}
-
-let Features = "avx512ifma,avx512vl|avxifma", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
- def vpmadd52luq128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>, _Vector<2, long long int>)">;
-}
-
-let Features = "avx512ifma,avx512vl|avxifma", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
- def vpmadd52luq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<4, long long int>, _Vector<4, long long int>)">;
-}
-
let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
def vcomisd : X86Builtin<"int(_Vector<2, double>, _Vector<2, double>, _Constant int, _Constant int)">;
def vcomiss : X86Builtin<"int(_Vector<4, float>, _Vector<4, float>, _Constant int, _Constant int)">;
@@ -3128,6 +3107,27 @@ let Features = "avx512bw", Attributes = [NoThrow, Const, Constexpr] in {
def kordi : X86Builtin<"unsigned long long int(unsigned long long int, unsigned long long int)">;
}
+let Features = "avx512ifma", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
+ def vpmadd52huq512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<8, long long int>, _Vector<8, long long int>)">;
+ def vpmadd52luq512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<8, long long int>, _Vector<8, long long int>)">;
+}
+
+let Features = "avx512ifma,avx512vl|avxifma", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
+ def vpmadd52huq128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>, _Vector<2, long long int>)">;
+}
+
+let Features = "avx512ifma,avx512vl|avxifma", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
+ def vpmadd52huq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<4, long long int>, _Vector<4, long long int>)">;
+}
+
+let Features = "avx512ifma,avx512vl|avxifma", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
+ def vpmadd52luq128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>, _Vector<2, long long int>)">;
+}
+
+let Features = "avx512ifma,avx512vl|avxifma", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
+ def vpmadd52luq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<4, long long int>, _Vector<4, long long int>)">;
+}
+
let Features = "avx512dq", Attributes = [NoThrow, Const] in {
def kortestcqi : X86Builtin<"int(unsigned char, unsigned char)">;
def kortestzqi : X86Builtin<"int(unsigned char, unsigned char)">;
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index 891344d4e6ed0..cf6a739cc5c60 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -3564,6 +3564,28 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
return F;
});
+ case X86::BI__builtin_ia32_vpmadd52luq128:
+ case X86::BI__builtin_ia32_vpmadd52luq256:
+ case X86::BI__builtin_ia32_vpmadd52luq512:
+ return interp__builtin_elementwise_triop(
+ S, OpPC, Call, [](const APSInt &A, const APSInt &B, const APSInt &C) {
+ APSInt result = A * B + C;
+ APSInt mask(APSInt::getAllOnes(52).zext(64), false);
+ APSInt masked_result = result & mask;
+ return APSInt(masked_result, true); // unsigned result
+ });
+ case X86::BI__builtin_ia32_vpmadd52huq128:
+ case X86::BI__builtin_ia32_vpmadd52huq256:
+ case X86::BI__builtin_ia32_vpmadd52huq512:
+ return interp__builtin_elementwise_triop(
+ S, OpPC, Call, [](const APSInt &A, const APSInt &B, const APSInt &C) {
+ APSInt result = A * B + C;
+ APSInt mask(APSInt::getAllOnes(52).zext(64), false);
+ APSInt shifted_result = result >> 52;
+ APSInt masked_result = shifted_result & mask;
+ return APSInt(masked_result, true); // unsigned result
+ });
+
case X86::BI__builtin_ia32_vpshldd128:
case X86::BI__builtin_ia32_vpshldd256:
case X86::BI__builtin_ia32_vpshldd512:
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index b706b14945b6d..c9d8a2b01dd74 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -60,10 +60,12 @@
#include "llvm/ADT/StringExtras.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/LSP/Logging.h"
#include "llvm/Support/SaveAndRestore.h"
#include "llvm/Support/SipHash.h"
#include "llvm/Support/TimeProfiler.h"
#include "llvm/Support/raw_ostream.h"
+
#include <cstring>
#include <functional>
#include <limits>
@@ -11869,6 +11871,55 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
return Success(APValue(ResultElements.data(), ResultElements.size()), E);
}
+ case X86::BI__builtin_ia32_vpmadd52luq128:
+ case X86::BI__builtin_ia32_vpmadd52luq256:
+ case X86::BI__builtin_ia32_vpmadd52luq512: {
+ APValue A, B, C;
+ if (!EvaluateAsRValue(Info, E->getArg(0), A) ||
+ !EvaluateAsRValue(Info, E->getArg(1), B) ||
+ !EvaluateAsRValue(Info, E->getArg(2), C))
+ return false;
+
+ unsigned ALen = A.getVectorLength();
+ SmallVector<APValue, 4> ResultElements;
+ ResultElements.reserve(ALen);
+
+ for (unsigned EltNum = 0; EltNum < ALen; EltNum += 1) {
+ APInt AElt = A.getVectorElt(EltNum).getInt();
+ APInt BElt = B.getVectorElt(EltNum).getInt();
+ APInt CElt = C.getVectorElt(EltNum).getInt();
+ APInt ResElt(AElt.zext(128) * BElt.zext(128) + CElt.zext(128));
+ APInt Mask(64, 0x000FFFFFFFFFFFFFULL);
+ ResultElements.push_back(APValue(APSInt(ResElt.trunc(64) & Mask, false)));
+ }
+
+ return Success(APValue(ResultElements.data(), ResultElements.size()), E);
+ }
+ case X86::BI__builtin_ia32_vpmadd52huq128:
+ case X86::BI__builtin_ia32_vpmadd52huq256:
+ case X86::BI__builtin_ia32_vpmadd52huq512: {
+ APValue A, B, C;
+ if (!EvaluateAsRValue(Info, E->getArg(0), A) ||
+ !EvaluateAsRValue(Info, E->getArg(1), B) ||
+ !EvaluateAsRValue(Info, E->getArg(2), C))
+ return false;
+
+ unsigned ALen = A.getVectorLength();
+ SmallVector<APValue, 4> ResultElements;
+ ResultElements.reserve(ALen);
+
+ for (unsigned EltNum = 0; EltNum < ALen; EltNum += 1) {
+ APInt AElt = A.getVectorElt(EltNum).getInt();
+ APInt BElt = B.getVectorElt(EltNum).getInt();
+ APInt CElt = C.getVectorElt(EltNum).getInt();
+ APInt ResElt(AElt.zext(128) * BElt.zext(128) + CElt.zext(128));
+ APInt Mask(64, 0x000FFFFFFFFFFFFFULL);
+ ResultElements.push_back(
+ APValue(APSInt(ResElt.lshr(52).trunc(64) & Mask, false)));
+ }
+
+ return Success(APValue(ResultElements.data(), ResultElements.size()), E);
+ }
case clang::X86::BI__builtin_ia32_vprotbi:
case clang::X86::BI__builtin_ia32_vprotdi:
case clang::X86::BI__builtin_ia32_vprotqi:
diff --git a/clang/lib/Headers/avx512ifmaintrin.h b/clang/lib/Headers/avx512ifmaintrin.h
index f01b322ce7787..6d800f25e5798 100644
--- a/clang/lib/Headers/avx512ifmaintrin.h
+++ b/clang/lib/Headers/avx512ifmaintrin.h
@@ -19,52 +19,55 @@
__attribute__((__always_inline__, __nodebug__, __target__("avx512ifma"), \
__min_vector_width__(512)))
-static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_madd52hi_epu64 (__m512i __X, __m512i __Y, __m512i __Z)
-{
- return (__m512i)__builtin_ia32_vpmadd52huq512((__v8di) __X, (__v8di) __Y,
- (__v8di) __Z);
+#if defined(__cplusplus) && (__cplusplus >= 201103L)
+#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS constexpr
+#else
+#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS
+#endif
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm512_madd52hi_epu64(__m512i __X, __m512i __Y, __m512i __Z) {
+ return (__m512i)__builtin_ia32_vpmadd52huq512((__v8di)__X, (__v8di)__Y,
+ (__v8di)__Z);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_mask_madd52hi_epu64 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
-{
- return (__m512i)__builtin_ia32_selectq_512(__M,
- (__v8di)_mm512_madd52hi_epu64(__W, __X, __Y),
- (__v8di)__W);
+static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm512_mask_madd52hi_epu64(__m512i __W, __mmask8 __M, __m512i __X,
+ __m512i __Y) {
+ return (__m512i)__builtin_ia32_selectq_512(
+ __M, (__v8di)_mm512_madd52hi_epu64(__W, __X, __Y), (__v8di)__W);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_maskz_madd52hi_epu64 (__mmask8 __M, __m512i __X, __m512i __Y, __m512i __Z)
-{
- return (__m512i)__builtin_ia32_selectq_512(__M,
- (__v8di)_mm512_madd52hi_epu64(__X, __Y, __Z),
- (__v8di)_mm512_setzero_si512());
+static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm512_maskz_madd52hi_epu64(__mmask8 __M, __m512i __X, __m512i __Y,
+ __m512i __Z) {
+ return (__m512i)__builtin_ia32_selectq_512(
+ __M, (__v8di)_mm512_madd52hi_epu64(__X, __Y, __Z),
+ (__v8di)_mm512_setzero_si512());
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_madd52lo_epu64 (__m512i __X, __m512i __Y, __m512i __Z)
-{
- return (__m512i)__builtin_ia32_vpmadd52luq512((__v8di) __X, (__v8di) __Y,
- (__v8di) __Z);
+static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm512_madd52lo_epu64(__m512i __X, __m512i __Y, __m512i __Z) {
+ return (__m512i)__builtin_ia32_vpmadd52luq512((__v8di)__X, (__v8di)__Y,
+ (__v8di)__Z);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_mask_madd52lo_epu64 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
-{
- return (__m512i)__builtin_ia32_selectq_512(__M,
- (__v8di)_mm512_madd52lo_epu64(__W, __X, __Y),
- (__v8di)__W);
+static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm512_mask_madd52lo_epu64(__m512i __W, __mmask8 __M, __m512i __X,
+ __m512i __Y) {
+ return (__m512i)__builtin_ia32_selectq_512(
+ __M, (__v8di)_mm512_madd52lo_epu64(__W, __X, __Y), (__v8di)__W);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_maskz_madd52lo_epu64 (__mmask8 __M, __m512i __X, __m512i __Y, __m512i __Z)
-{
- return (__m512i)__builtin_ia32_selectq_512(__M,
- (__v8di)_mm512_madd52lo_epu64(__X, __Y, __Z),
- (__v8di)_mm512_setzero_si512());
+static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm512_maskz_madd52lo_epu64(__mmask8 __M, __m512i __X, __m512i __Y,
+ __m512i __Z) {
+ return (__m512i)__builtin_ia32_selectq_512(
+ __M, (__v8di)_mm512_madd52lo_epu64(__X, __Y, __Z),
+ (__v8di)_mm512_setzero_si512());
}
#undef __DEFAULT_FN_ATTRS
+#undef __DEFAULT_FN_ATTRS_CONSTEXPR
#endif
diff --git a/clang/lib/Headers/avxifmaintrin.h b/clang/lib/Headers/avxifmaintrin.h
index 5c782d2a5b865..1a9aaaf53affa 100644
--- a/clang/lib/Headers/avxifmaintrin.h
+++ b/clang/lib/Headers/avxifmaintrin.h
@@ -22,6 +22,14 @@
__attribute__((__always_inline__, __nodebug__, __target__("avxifma"), \
__min_vector_width__(256)))
+#if defined(__cplusplus) && (__cplusplus >= 201103L)
+#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256 constexpr
+#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128 constexpr
+#else
+#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256
+#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128
+#endif
+
// must vex-encoding
/// Multiply packed unsigned 52-bit integers in each 64-bit element of \a __Y
@@ -55,7 +63,7 @@
/// ENDFOR
/// dst[MAX:128] := 0
/// \endcode
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_madd52hi_avx_epu64(__m128i __X, __m128i __Y, __m128i __Z) {
return (__m128i)__builtin_ia32_vpmadd52huq128((__v2di)__X, (__v2di)__Y,
(__v2di)__Z);
@@ -92,7 +100,7 @@ _mm_madd52hi_avx_epu64(__m128i __X, __m128i __Y, __m128i __Z) {
/// ENDFOR
/// dst[MAX:256] := 0
/// \endcode
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_madd52hi_avx_epu64(__m256i __X, __m256i __Y, __m256i __Z) {
return (__m256i)__builtin_ia32_vpmadd52huq256((__v4di)__X, (__v4di)__Y,
(__v4di)__Z);
@@ -129,7 +137,7 @@ _mm256_madd52hi_avx_epu64(__m256i __X, __m256i __Y, __m256i __Z) {
/// ENDFOR
/// dst[MAX:128] := 0
/// \endcode
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_madd52lo_avx_epu64(__m128i __X, __m128i __Y, __m128i __Z) {
return (__m128i)__builtin_ia32_vpmadd52luq128((__v2di)__X, (__v2di)__Y,
(__v2di)__Z);
@@ -166,12 +174,14 @@ _mm_madd52lo_avx_epu64(__m128i __X, __m128i __Y, __m128i __Z) {
/// ENDFOR
/// dst[MAX:256] := 0
/// \endcode
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_madd52lo_avx_epu64(__m256i __X, __m256i __Y, __m256i __Z) {
return (__m256i)__builtin_ia32_vpmadd52luq256((__v4di)__X, (__v4di)__Y,
(__v4di)__Z);
}
#undef __DEFAULT_FN_ATTRS128
#undef __DEFAULT_FN_ATTRS256
+#undef __DEFAULT_FN_ATTRS256_CONSTEXPR
+#undef __DEFAULT_FN_ATTRS128_CONSTEXPR
#endif // __AVXIFMAINTRIN_H
diff --git a/clang/test/AST/ByteCode/x86-ifma-constexpr.cpp b/clang/test/AST/ByteCode/x86-ifma-constexpr.cpp
new file mode 100644
index 0000000000000..72dde7d27b3ec
--- /dev/null
+++ b/clang/test/AST/ByteCode/x86-ifma-constexpr.cpp
@@ -0,0 +1,36 @@
+// RUN: %clang_cc1 -fexperimental-new-constant-interpreter -std=c++2a -fsyntax-only \
+// RUN: -triple x86_64-unknown-unknown -target-feature +avxifma -ffreestanding \
+// RUN: -verify %s
+
+// Test constexpr evaluation of X86 IFMA intrinsics with the ByteCode interpreter.
+
+typedef long long __m128i __attribute__((__vector_size__(16), __aligned__(16)));
+
+// Declare required IFMA builtin functions.
+extern "C" {
+__m128i __builtin_ia32_vpmadd52luq128(__m128i, __m128i, __m128i);
+__m128i __builtin_ia32_vpmadd52huq128(__m128i, __m128i, __m128i);
+}
+
+// Intrinsic wrapper functions.
+static constexpr __inline__ __m128i __attribute__((__always_inline__, __nodebug__, __target__("avxifma")))
+_mm_madd52lo_epu64(__m128i __X, __m128i __Y, __m128i __Z) {
+ return __builtin_ia32_vpmadd52luq128(__X, __Y, __Z);
+}
+
+// Simple test to check if IFMA intrinsics can be used in constexpr context
+constexpr bool test_basic_ifma() {
+ __m128i a = (__m128i){5ULL, 3ULL};
+ __m128i b = (__m128i){7ULL, 4ULL};
+ __m128i c = (__m128i){2ULL, 1ULL};
+
+ // Just test that we can call the intrinsic in constexpr context
+ __m128i result = _mm_madd52lo_epu64(a, b, c);
+ (void)result; // Suppress unused variable warning
+ return true;
+}
+
+// Basic test to verify constexpr evaluation works
+static_assert(test_basic_ifma(), "Basic IFMA constexpr test failed");
+
+// expected-no-diagnostics
\ No newline at end of file
diff --git a/clang/test/Sema/x86-ifma-constexpr.cpp b/clang/test/Sema/x86-ifma-constexpr.cpp
new file mode 100644
index 0000000000000..eb1fca13dfea5
--- /dev/null
+++ b/clang/test/Sema/x86-ifma-constexpr.cpp
@@ -0,0 +1,262 @@
+// RUN: %clang_cc1 -std=c++2a -fsyntax-only -triple x86_64-unknown-unknown \
+// RUN: -target-feature +avxifma -ffreestanding -verify %s
+
+typedef long long __m128i __attribute__((__vector_size__(16), __aligned__(16)));
+typedef long long __m256i __attribute__((__vector_size__(32), __aligned__(32)));
+typedef long long __m512i __attribute__((__vector_size__(64), __aligned__(64)));
+
+typedef unsigned long long __v2du __attribute__((__vector_size__(16)));
+typedef unsigned long long __v4du __attribute__((__vector_size__(32)));
+typedef unsigned long long __v8du __attribute__((__vector_size__(64)));
+
+extern "C" {
+__m128i __builtin_ia32_vpmadd52luq128(__m128i, __m128i, __m128i);
+__m128i __builtin_ia32_vpmadd52huq128(__m128i, __m128i, __m128i);
+__m256i __builtin_ia32_vpmadd52luq256(__m256i, __m256i, __m256i);
+__m256i __builtin_ia32_vpmadd52huq256(__m256i, __m256i, __m256i);
+__m512i __builtin_ia32_vpmadd52luq512(__m512i, __m512i, __m512i);
+__m512i __builtin_ia32_vpmadd52huq512(__m512i, __m512i, __m512i);
+}
+static constexpr __inline__ __m128i __attribute__((__always_inline__, __nodebug__, __target__("avxifma")))
+_mm_madd52lo_epu64(__m128i __X, __m128i __Y, __m128i __Z) {
+ return __builtin_ia32_vpmadd52luq128(__X, __Y, __Z);
+}
+
+static constexpr __inline__ __m128i __attribute__((__always_inline__, __nodebug__, __target__("avxifma")))
+_mm_madd52hi_epu64(__m128i __X, __m128i __Y, __m128i __Z) {
+ return __builtin_ia32_vpmadd52huq128(__X, __Y, __Z);
+}
+
+static constexpr __inline__ __m256i __attribute__((__always_inline__, __nodebug__, __target__("avxifma")))
+_mm256_madd52lo_epu64(__m256i __X, __m256i __Y, __m256i __Z) {
+ return __builtin_ia32_vpmadd52luq256(__X, __Y, __Z);
+}
+
+static constexpr __inline__ __m256i __attribute__((__always_inline__, __nodebug__, __target__("avxifma")))
+_mm256_madd52hi_epu64(__m256i __X, __m256i __Y, __m256i __Z) {
+ return __builtin_ia32_vpmadd52huq256(__X, __Y, __Z);
+}
+
+static constexpr __inline__ __m512i __attribute__((__always_inline__, __nodebug__, __target__("avxifma")))
+_mm512_madd52lo_epu64(__m512i __X, __m512i __Y, __m512i __Z) {
+ return __builtin_ia32_vpmadd52luq512(__X, __Y, __Z);
+}
+
+static constexpr __inline__ __m512i __attribute__((__always_inline__, __nodebug__, __target__("avxifma")))
+_mm512_madd52hi_epu64(__m512i __X, __m512i __Y, __m512i __Z) {
+ return __builtin_ia32_vpmadd52huq512(__X, __Y, __Z);
+}
+
+#define TEST_CONSTEXPR(expr) static_assert(expr, "constexpr test failed")
+
+constexpr bool match_v2du(__m128i result, unsigned long long e0, unsigned long long e1) {
+ __v2du v = (__v2du)result;
+ return v[0] == e0 && v[1] == e1;
+}
+
+constexpr bool match_v4du(__m256i result, unsigned long long e0, unsigned long long e1,
+ unsigned long long e2, unsigned long long e3) {
+ __v4du v = (__v4du)result;
+ return v[0] == e0 && v[1] == e1 && v[2] == e2 && v[3] == e3;
+}
+
+constexpr bool match_v8du(__m512i result, unsigned long long e0, unsigned long long e1,
+ unsigned long long e2, unsigned long long e3,
+ unsigned long long e4, unsigned long long e5,
+ unsigned long long e6, unsigned long long ...
[truncated]
|
bc897b3 to
1920f6d
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Please take a look at the Intel Intrinsics Guide to get a better understanding of the madd52 instructions
1920f6d to
14c11d6
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Missing avx512ifmavl-builtins.c coverage
f11c3bd to
5bddd56
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
avx512ifmavlintrin.h ?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
you should now be able to cleanup your tests and get rid of the macosx hacks
… intrinsics test coverage (llvm#161684) Inspired by test problems encountered on llvm#161056
5bddd56 to
0613d83
Compare
0613d83 to
83f379b
Compare
83f379b to
7692612
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
APInt calculation looks good to me. Please wait for @RKSimon to approve the frontend-specific part.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
a couple of minors
3fb4fb0 to
314265f
Compare
|
@RKSimon i have addressed the comments. Also i had to use |
| #include "builtin_test_helpers.h" | ||
|
|
||
|
|
||
| __attribute__((target("avx512ifma,avx512vl"))) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't understand why this is necessary
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
+ /mnt/f/llvm-project/build/bin/FileCheck /mnt/f/llvm-project/clang/test/CodeGen/X86/avxifma-builtins.c
/mnt/f/llvm-project/clang/test/CodeGen/X86/avxifma-builtins.c:18:10: error: always_inline function '_mm_madd52hi_epu64' requires target feature 'avx512ifma', but would be inlined into function 'test_mm_madd52hi_epu64' that is compiled without support for 'avx512ifma'
18 | return _mm_madd52hi_epu64(__X, __Y, __Z);
| ^
1 error generated.
FileCheck error: '<stdin>' is empty.
FileCheck command line: /mnt/f/llvm-project/build/bin/FileCheck /mnt/f/llvm-project/clang/test/CodeGen/X86/avxifma-builtins.cThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The fact that it didn't require this beforehand is very suspicous but I can't see the mistake right now @FreddyLeaf @phoebewang any ideas?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
i think i understand why, in clang/lib/Headers/avx512ifmavlintrin.hpreviously it was a macro, now it's a static function. That is why we need the __attribute__
#define _mm_madd52hi_epu64(X, Y, Z) \
((__m128i)__builtin_ia32_vpmadd52huq128((__v2di)(X), (__v2di)(Y), \
(__v2di)(Z)))static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_madd52hi_epu64(__m128i __X, __m128i __Y, __m128i __Z) {
return (__m128i)__builtin_ia32_vpmadd52huq128((__v2di)__X, (__v2di)__Y,
(__v2di)__Z);
}There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We cannot change it to static functions directly. I have a workaround that may help here: #162760
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
i will rebase and resolve once those changes are merged
…Allow AVX/AVX512 IFMA madd52 intrinsics to be used in constexpr Resolves llvm#160498
314265f to
1a0a6e4
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM - cheers
…Allow AVX/AVX512 IFMA madd52 intrinsics to be used in constexpr (llvm#161056) Resolves llvm#160498
Resolves #160498