Skip to content

Conversation

@chaitanyav
Copy link
Contributor

Resolves #160498

@github-actions
Copy link

github-actions bot commented Sep 28, 2025

✅ With the latest revision this PR passed the C/C++ code formatter.

@chaitanyav chaitanyav force-pushed the issue_160498 branch 2 times, most recently from 4b022cd to bc897b3 Compare September 28, 2025 10:08
@chaitanyav chaitanyav marked this pull request as ready for review September 29, 2025 03:01
@chaitanyav chaitanyav requested review from FreddyLeaf, RKSimon and tbaederr and removed request for RKSimon and tbaederr September 29, 2025 03:01
@llvmbot llvmbot added clang Clang issues not falling into any other category backend:X86 clang:frontend Language frontend issues, e.g. anything involving "Sema" clang:headers Headers provided by Clang, e.g. for intrinsics clang:bytecode Issues for the clang bytecode constexpr interpreter labels Sep 29, 2025
@llvmbot
Copy link
Member

llvmbot commented Sep 29, 2025

@llvm/pr-subscribers-clang

@llvm/pr-subscribers-backend-x86

Author: NagaChaitanya Vellanki (chaitanyav)

Changes

Resolves #160498


Patch is 27.25 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/161056.diff

7 Files Affected:

  • (modified) clang/include/clang/Basic/BuiltinsX86.td (+21-21)
  • (modified) clang/lib/AST/ByteCode/InterpBuiltin.cpp (+22)
  • (modified) clang/lib/AST/ExprConstant.cpp (+51)
  • (modified) clang/lib/Headers/avx512ifmaintrin.h (+37-34)
  • (modified) clang/lib/Headers/avxifmaintrin.h (+14-4)
  • (added) clang/test/AST/ByteCode/x86-ifma-constexpr.cpp (+36)
  • (added) clang/test/Sema/x86-ifma-constexpr.cpp (+262)
diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td
index 77e599587edc3..a5247629e255f 100644
--- a/clang/include/clang/Basic/BuiltinsX86.td
+++ b/clang/include/clang/Basic/BuiltinsX86.td
@@ -2101,27 +2101,6 @@ let Features = "avx512vl", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
   def movdqa64store256_mask : X86Builtin<"void(_Vector<4, long long int *>, _Vector<4, long long int>, unsigned char)">;
 }
 
-let Features = "avx512ifma", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
-  def vpmadd52huq512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<8, long long int>, _Vector<8, long long int>)">;
-  def vpmadd52luq512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<8, long long int>, _Vector<8, long long int>)">;
-}
-
-let Features = "avx512ifma,avx512vl|avxifma", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
-  def vpmadd52huq128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>, _Vector<2, long long int>)">;
-}
-
-let Features = "avx512ifma,avx512vl|avxifma", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
-  def vpmadd52huq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<4, long long int>, _Vector<4, long long int>)">;
-}
-
-let Features = "avx512ifma,avx512vl|avxifma", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
-  def vpmadd52luq128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>, _Vector<2, long long int>)">;
-}
-
-let Features = "avx512ifma,avx512vl|avxifma", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
-  def vpmadd52luq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<4, long long int>, _Vector<4, long long int>)">;
-}
-
 let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
   def vcomisd : X86Builtin<"int(_Vector<2, double>, _Vector<2, double>, _Constant int, _Constant int)">;
   def vcomiss : X86Builtin<"int(_Vector<4, float>, _Vector<4, float>, _Constant int, _Constant int)">;
@@ -3128,6 +3107,27 @@ let Features = "avx512bw", Attributes = [NoThrow, Const, Constexpr] in {
   def kordi : X86Builtin<"unsigned long long int(unsigned long long int, unsigned long long int)">;
 }
 
+let Features = "avx512ifma", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
+  def vpmadd52huq512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<8, long long int>, _Vector<8, long long int>)">;
+  def vpmadd52luq512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<8, long long int>, _Vector<8, long long int>)">;
+}
+
+let Features = "avx512ifma,avx512vl|avxifma", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
+  def vpmadd52huq128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>, _Vector<2, long long int>)">;
+}
+
+let Features = "avx512ifma,avx512vl|avxifma", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
+  def vpmadd52huq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<4, long long int>, _Vector<4, long long int>)">;
+}
+
+let Features = "avx512ifma,avx512vl|avxifma", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
+  def vpmadd52luq128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>, _Vector<2, long long int>)">;
+}
+
+let Features = "avx512ifma,avx512vl|avxifma", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
+  def vpmadd52luq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<4, long long int>, _Vector<4, long long int>)">;
+}
+
 let Features = "avx512dq", Attributes = [NoThrow, Const] in {
   def kortestcqi : X86Builtin<"int(unsigned char, unsigned char)">;
   def kortestzqi : X86Builtin<"int(unsigned char, unsigned char)">;
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index 891344d4e6ed0..cf6a739cc5c60 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -3564,6 +3564,28 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
           return F;
         });
 
+  case X86::BI__builtin_ia32_vpmadd52luq128:
+  case X86::BI__builtin_ia32_vpmadd52luq256:
+  case X86::BI__builtin_ia32_vpmadd52luq512:
+    return interp__builtin_elementwise_triop(
+        S, OpPC, Call, [](const APSInt &A, const APSInt &B, const APSInt &C) {
+          APSInt result = A * B + C;
+          APSInt mask(APSInt::getAllOnes(52).zext(64), false);
+          APSInt masked_result = result & mask;
+          return APSInt(masked_result, true); // unsigned result
+        });
+  case X86::BI__builtin_ia32_vpmadd52huq128:
+  case X86::BI__builtin_ia32_vpmadd52huq256:
+  case X86::BI__builtin_ia32_vpmadd52huq512:
+    return interp__builtin_elementwise_triop(
+        S, OpPC, Call, [](const APSInt &A, const APSInt &B, const APSInt &C) {
+          APSInt result = A * B + C;
+          APSInt mask(APSInt::getAllOnes(52).zext(64), false);
+          APSInt shifted_result = result >> 52;
+          APSInt masked_result = shifted_result & mask;
+          return APSInt(masked_result, true); // unsigned result
+        });
+
   case X86::BI__builtin_ia32_vpshldd128:
   case X86::BI__builtin_ia32_vpshldd256:
   case X86::BI__builtin_ia32_vpshldd512:
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index b706b14945b6d..c9d8a2b01dd74 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -60,10 +60,12 @@
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/LSP/Logging.h"
 #include "llvm/Support/SaveAndRestore.h"
 #include "llvm/Support/SipHash.h"
 #include "llvm/Support/TimeProfiler.h"
 #include "llvm/Support/raw_ostream.h"
+
 #include <cstring>
 #include <functional>
 #include <limits>
@@ -11869,6 +11871,55 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
     return Success(APValue(ResultElements.data(), ResultElements.size()), E);
   }
 
+  case X86::BI__builtin_ia32_vpmadd52luq128:
+  case X86::BI__builtin_ia32_vpmadd52luq256:
+  case X86::BI__builtin_ia32_vpmadd52luq512: {
+    APValue A, B, C;
+    if (!EvaluateAsRValue(Info, E->getArg(0), A) ||
+        !EvaluateAsRValue(Info, E->getArg(1), B) ||
+        !EvaluateAsRValue(Info, E->getArg(2), C))
+      return false;
+
+    unsigned ALen = A.getVectorLength();
+    SmallVector<APValue, 4> ResultElements;
+    ResultElements.reserve(ALen);
+
+    for (unsigned EltNum = 0; EltNum < ALen; EltNum += 1) {
+      APInt AElt = A.getVectorElt(EltNum).getInt();
+      APInt BElt = B.getVectorElt(EltNum).getInt();
+      APInt CElt = C.getVectorElt(EltNum).getInt();
+      APInt ResElt(AElt.zext(128) * BElt.zext(128) + CElt.zext(128));
+      APInt Mask(64, 0x000FFFFFFFFFFFFFULL);
+      ResultElements.push_back(APValue(APSInt(ResElt.trunc(64) & Mask, false)));
+    }
+
+    return Success(APValue(ResultElements.data(), ResultElements.size()), E);
+  }
+  case X86::BI__builtin_ia32_vpmadd52huq128:
+  case X86::BI__builtin_ia32_vpmadd52huq256:
+  case X86::BI__builtin_ia32_vpmadd52huq512: {
+    APValue A, B, C;
+    if (!EvaluateAsRValue(Info, E->getArg(0), A) ||
+        !EvaluateAsRValue(Info, E->getArg(1), B) ||
+        !EvaluateAsRValue(Info, E->getArg(2), C))
+      return false;
+
+    unsigned ALen = A.getVectorLength();
+    SmallVector<APValue, 4> ResultElements;
+    ResultElements.reserve(ALen);
+
+    for (unsigned EltNum = 0; EltNum < ALen; EltNum += 1) {
+      APInt AElt = A.getVectorElt(EltNum).getInt();
+      APInt BElt = B.getVectorElt(EltNum).getInt();
+      APInt CElt = C.getVectorElt(EltNum).getInt();
+      APInt ResElt(AElt.zext(128) * BElt.zext(128) + CElt.zext(128));
+      APInt Mask(64, 0x000FFFFFFFFFFFFFULL);
+      ResultElements.push_back(
+          APValue(APSInt(ResElt.lshr(52).trunc(64) & Mask, false)));
+    }
+
+    return Success(APValue(ResultElements.data(), ResultElements.size()), E);
+  }
   case clang::X86::BI__builtin_ia32_vprotbi:
   case clang::X86::BI__builtin_ia32_vprotdi:
   case clang::X86::BI__builtin_ia32_vprotqi:
diff --git a/clang/lib/Headers/avx512ifmaintrin.h b/clang/lib/Headers/avx512ifmaintrin.h
index f01b322ce7787..6d800f25e5798 100644
--- a/clang/lib/Headers/avx512ifmaintrin.h
+++ b/clang/lib/Headers/avx512ifmaintrin.h
@@ -19,52 +19,55 @@
   __attribute__((__always_inline__, __nodebug__, __target__("avx512ifma"),     \
                  __min_vector_width__(512)))
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_madd52hi_epu64 (__m512i __X, __m512i __Y, __m512i __Z)
-{
-  return (__m512i)__builtin_ia32_vpmadd52huq512((__v8di) __X, (__v8di) __Y,
-                                                (__v8di) __Z);
+#if defined(__cplusplus) && (__cplusplus >= 201103L)
+#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS constexpr
+#else
+#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS
+#endif
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm512_madd52hi_epu64(__m512i __X, __m512i __Y, __m512i __Z) {
+  return (__m512i)__builtin_ia32_vpmadd52huq512((__v8di)__X, (__v8di)__Y,
+                                                (__v8di)__Z);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_mask_madd52hi_epu64 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
-{
-  return (__m512i)__builtin_ia32_selectq_512(__M,
-                                   (__v8di)_mm512_madd52hi_epu64(__W, __X, __Y),
-                                   (__v8di)__W);
+static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm512_mask_madd52hi_epu64(__m512i __W, __mmask8 __M, __m512i __X,
+                           __m512i __Y) {
+  return (__m512i)__builtin_ia32_selectq_512(
+      __M, (__v8di)_mm512_madd52hi_epu64(__W, __X, __Y), (__v8di)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_maskz_madd52hi_epu64 (__mmask8 __M, __m512i __X, __m512i __Y, __m512i __Z)
-{
-  return (__m512i)__builtin_ia32_selectq_512(__M,
-                                   (__v8di)_mm512_madd52hi_epu64(__X, __Y, __Z),
-                                   (__v8di)_mm512_setzero_si512());
+static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm512_maskz_madd52hi_epu64(__mmask8 __M, __m512i __X, __m512i __Y,
+                            __m512i __Z) {
+  return (__m512i)__builtin_ia32_selectq_512(
+      __M, (__v8di)_mm512_madd52hi_epu64(__X, __Y, __Z),
+      (__v8di)_mm512_setzero_si512());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_madd52lo_epu64 (__m512i __X, __m512i __Y, __m512i __Z)
-{
-  return (__m512i)__builtin_ia32_vpmadd52luq512((__v8di) __X, (__v8di) __Y,
-                                                (__v8di) __Z);
+static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm512_madd52lo_epu64(__m512i __X, __m512i __Y, __m512i __Z) {
+  return (__m512i)__builtin_ia32_vpmadd52luq512((__v8di)__X, (__v8di)__Y,
+                                                (__v8di)__Z);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_mask_madd52lo_epu64 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
-{
-  return (__m512i)__builtin_ia32_selectq_512(__M,
-                                   (__v8di)_mm512_madd52lo_epu64(__W, __X, __Y),
-                                   (__v8di)__W);
+static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm512_mask_madd52lo_epu64(__m512i __W, __mmask8 __M, __m512i __X,
+                           __m512i __Y) {
+  return (__m512i)__builtin_ia32_selectq_512(
+      __M, (__v8di)_mm512_madd52lo_epu64(__W, __X, __Y), (__v8di)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_maskz_madd52lo_epu64 (__mmask8 __M, __m512i __X, __m512i __Y, __m512i __Z)
-{
-  return (__m512i)__builtin_ia32_selectq_512(__M,
-                                   (__v8di)_mm512_madd52lo_epu64(__X, __Y, __Z),
-                                   (__v8di)_mm512_setzero_si512());
+static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm512_maskz_madd52lo_epu64(__mmask8 __M, __m512i __X, __m512i __Y,
+                            __m512i __Z) {
+  return (__m512i)__builtin_ia32_selectq_512(
+      __M, (__v8di)_mm512_madd52lo_epu64(__X, __Y, __Z),
+      (__v8di)_mm512_setzero_si512());
 }
 
 #undef __DEFAULT_FN_ATTRS
+#undef __DEFAULT_FN_ATTRS_CONSTEXPR
 
 #endif
diff --git a/clang/lib/Headers/avxifmaintrin.h b/clang/lib/Headers/avxifmaintrin.h
index 5c782d2a5b865..1a9aaaf53affa 100644
--- a/clang/lib/Headers/avxifmaintrin.h
+++ b/clang/lib/Headers/avxifmaintrin.h
@@ -22,6 +22,14 @@
   __attribute__((__always_inline__, __nodebug__, __target__("avxifma"),        \
                  __min_vector_width__(256)))
 
+#if defined(__cplusplus) && (__cplusplus >= 201103L)
+#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256 constexpr
+#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128 constexpr
+#else
+#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256
+#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128
+#endif
+
 // must vex-encoding
 
 /// Multiply packed unsigned 52-bit integers in each 64-bit element of \a __Y
@@ -55,7 +63,7 @@
 /// ENDFOR
 /// dst[MAX:128] := 0
 /// \endcode
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_madd52hi_avx_epu64(__m128i __X, __m128i __Y, __m128i __Z) {
   return (__m128i)__builtin_ia32_vpmadd52huq128((__v2di)__X, (__v2di)__Y,
                                                 (__v2di)__Z);
@@ -92,7 +100,7 @@ _mm_madd52hi_avx_epu64(__m128i __X, __m128i __Y, __m128i __Z) {
 /// ENDFOR
 /// dst[MAX:256] := 0
 /// \endcode
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_madd52hi_avx_epu64(__m256i __X, __m256i __Y, __m256i __Z) {
   return (__m256i)__builtin_ia32_vpmadd52huq256((__v4di)__X, (__v4di)__Y,
                                                 (__v4di)__Z);
@@ -129,7 +137,7 @@ _mm256_madd52hi_avx_epu64(__m256i __X, __m256i __Y, __m256i __Z) {
 /// ENDFOR
 /// dst[MAX:128] := 0
 /// \endcode
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_madd52lo_avx_epu64(__m128i __X, __m128i __Y, __m128i __Z) {
   return (__m128i)__builtin_ia32_vpmadd52luq128((__v2di)__X, (__v2di)__Y,
                                                 (__v2di)__Z);
@@ -166,12 +174,14 @@ _mm_madd52lo_avx_epu64(__m128i __X, __m128i __Y, __m128i __Z) {
 /// ENDFOR
 /// dst[MAX:256] := 0
 /// \endcode
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_madd52lo_avx_epu64(__m256i __X, __m256i __Y, __m256i __Z) {
   return (__m256i)__builtin_ia32_vpmadd52luq256((__v4di)__X, (__v4di)__Y,
                                                 (__v4di)__Z);
 }
 #undef __DEFAULT_FN_ATTRS128
 #undef __DEFAULT_FN_ATTRS256
+#undef __DEFAULT_FN_ATTRS256_CONSTEXPR
+#undef __DEFAULT_FN_ATTRS128_CONSTEXPR
 
 #endif // __AVXIFMAINTRIN_H
diff --git a/clang/test/AST/ByteCode/x86-ifma-constexpr.cpp b/clang/test/AST/ByteCode/x86-ifma-constexpr.cpp
new file mode 100644
index 0000000000000..72dde7d27b3ec
--- /dev/null
+++ b/clang/test/AST/ByteCode/x86-ifma-constexpr.cpp
@@ -0,0 +1,36 @@
+// RUN: %clang_cc1 -fexperimental-new-constant-interpreter -std=c++2a -fsyntax-only \
+// RUN:   -triple x86_64-unknown-unknown -target-feature +avxifma -ffreestanding \
+// RUN:   -verify %s
+
+// Test constexpr evaluation of X86 IFMA intrinsics with the ByteCode interpreter.
+
+typedef long long __m128i __attribute__((__vector_size__(16), __aligned__(16)));
+
+// Declare required IFMA builtin functions.
+extern "C" {
+__m128i __builtin_ia32_vpmadd52luq128(__m128i, __m128i, __m128i);
+__m128i __builtin_ia32_vpmadd52huq128(__m128i, __m128i, __m128i);
+}
+
+// Intrinsic wrapper functions.
+static constexpr __inline__ __m128i __attribute__((__always_inline__, __nodebug__, __target__("avxifma")))
+_mm_madd52lo_epu64(__m128i __X, __m128i __Y, __m128i __Z) {
+  return __builtin_ia32_vpmadd52luq128(__X, __Y, __Z);
+}
+
+// Simple test to check if IFMA intrinsics can be used in constexpr context
+constexpr bool test_basic_ifma() {
+  __m128i a = (__m128i){5ULL, 3ULL};
+  __m128i b = (__m128i){7ULL, 4ULL};
+  __m128i c = (__m128i){2ULL, 1ULL};
+
+  // Just test that we can call the intrinsic in constexpr context
+  __m128i result = _mm_madd52lo_epu64(a, b, c);
+  (void)result; // Suppress unused variable warning
+  return true;
+}
+
+// Basic test to verify constexpr evaluation works
+static_assert(test_basic_ifma(), "Basic IFMA constexpr test failed");
+
+// expected-no-diagnostics
\ No newline at end of file
diff --git a/clang/test/Sema/x86-ifma-constexpr.cpp b/clang/test/Sema/x86-ifma-constexpr.cpp
new file mode 100644
index 0000000000000..eb1fca13dfea5
--- /dev/null
+++ b/clang/test/Sema/x86-ifma-constexpr.cpp
@@ -0,0 +1,262 @@
+// RUN: %clang_cc1 -std=c++2a -fsyntax-only -triple x86_64-unknown-unknown \
+// RUN:   -target-feature +avxifma -ffreestanding -verify %s
+
+typedef long long __m128i __attribute__((__vector_size__(16), __aligned__(16)));
+typedef long long __m256i __attribute__((__vector_size__(32), __aligned__(32)));
+typedef long long __m512i __attribute__((__vector_size__(64), __aligned__(64)));
+
+typedef unsigned long long __v2du __attribute__((__vector_size__(16)));
+typedef unsigned long long __v4du __attribute__((__vector_size__(32)));
+typedef unsigned long long __v8du __attribute__((__vector_size__(64)));
+
+extern "C" {
+__m128i __builtin_ia32_vpmadd52luq128(__m128i, __m128i, __m128i);
+__m128i __builtin_ia32_vpmadd52huq128(__m128i, __m128i, __m128i);
+__m256i __builtin_ia32_vpmadd52luq256(__m256i, __m256i, __m256i);
+__m256i __builtin_ia32_vpmadd52huq256(__m256i, __m256i, __m256i);
+__m512i __builtin_ia32_vpmadd52luq512(__m512i, __m512i, __m512i);
+__m512i __builtin_ia32_vpmadd52huq512(__m512i, __m512i, __m512i);
+}
+static constexpr __inline__ __m128i __attribute__((__always_inline__, __nodebug__, __target__("avxifma")))
+_mm_madd52lo_epu64(__m128i __X, __m128i __Y, __m128i __Z) {
+  return __builtin_ia32_vpmadd52luq128(__X, __Y, __Z);
+}
+
+static constexpr __inline__ __m128i __attribute__((__always_inline__, __nodebug__, __target__("avxifma")))
+_mm_madd52hi_epu64(__m128i __X, __m128i __Y, __m128i __Z) {
+  return __builtin_ia32_vpmadd52huq128(__X, __Y, __Z);
+}
+
+static constexpr __inline__ __m256i __attribute__((__always_inline__, __nodebug__, __target__("avxifma")))
+_mm256_madd52lo_epu64(__m256i __X, __m256i __Y, __m256i __Z) {
+  return __builtin_ia32_vpmadd52luq256(__X, __Y, __Z);
+}
+
+static constexpr __inline__ __m256i __attribute__((__always_inline__, __nodebug__, __target__("avxifma")))
+_mm256_madd52hi_epu64(__m256i __X, __m256i __Y, __m256i __Z) {
+  return __builtin_ia32_vpmadd52huq256(__X, __Y, __Z);
+}
+
+static constexpr __inline__ __m512i __attribute__((__always_inline__, __nodebug__, __target__("avxifma")))
+_mm512_madd52lo_epu64(__m512i __X, __m512i __Y, __m512i __Z) {
+  return __builtin_ia32_vpmadd52luq512(__X, __Y, __Z);
+}
+
+static constexpr __inline__ __m512i __attribute__((__always_inline__, __nodebug__, __target__("avxifma")))
+_mm512_madd52hi_epu64(__m512i __X, __m512i __Y, __m512i __Z) {
+  return __builtin_ia32_vpmadd52huq512(__X, __Y, __Z);
+}
+
+#define TEST_CONSTEXPR(expr) static_assert(expr, "constexpr test failed")
+
+constexpr bool match_v2du(__m128i result, unsigned long long e0, unsigned long long e1) {
+  __v2du v = (__v2du)result;
+  return v[0] == e0 && v[1] == e1;
+}
+
+constexpr bool match_v4du(__m256i result, unsigned long long e0, unsigned long long e1,
+                          unsigned long long e2, unsigned long long e3) {
+  __v4du v = (__v4du)result;
+  return v[0] == e0 && v[1] == e1 && v[2] == e2 && v[3] == e3;
+}
+
+constexpr bool match_v8du(__m512i result, unsigned long long e0, unsigned long long e1,
+                          unsigned long long e2, unsigned long long e3,
+                          unsigned long long e4, unsigned long long e5,
+                          unsigned long long e6, unsigned long long ...
[truncated]

Copy link
Collaborator

@RKSimon RKSimon left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please take a look at the Intel Intrinsics Guide to get a better understanding of the madd52 instructions

Copy link
Collaborator

@RKSimon RKSimon left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Missing avx512ifmavl-builtins.c coverage

@RKSimon RKSimon requested a review from XChy September 30, 2025 07:49
@chaitanyav chaitanyav force-pushed the issue_160498 branch 2 times, most recently from f11c3bd to 5bddd56 Compare September 30, 2025 16:50
@RKSimon RKSimon self-requested a review October 2, 2025 09:16
Copy link
Collaborator

@RKSimon RKSimon left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

avx512ifmavlintrin.h ?

RKSimon added a commit that referenced this pull request Oct 2, 2025
… intrinsics test coverage (#161684)

Inspired by test problems encountered on #161056
Copy link
Collaborator

@RKSimon RKSimon left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

you should now be able to cleanup your tests and get rid of the macosx hacks

mahesh-attarde pushed a commit to mahesh-attarde/llvm-project that referenced this pull request Oct 3, 2025
… intrinsics test coverage (llvm#161684)

Inspired by test problems encountered on llvm#161056
@chaitanyav chaitanyav requested review from RKSimon and XChy October 3, 2025 05:22
Copy link
Member

@XChy XChy left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

APInt calculation looks good to me. Please wait for @RKSimon to approve the frontend-specific part.

Copy link
Collaborator

@RKSimon RKSimon left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

a couple of minors

@chaitanyav chaitanyav force-pushed the issue_160498 branch 2 times, most recently from 3fb4fb0 to 314265f Compare October 8, 2025 03:45
@chaitanyav
Copy link
Contributor Author

@RKSimon i have addressed the comments. Also i had to use __attribute__((target("avx512ifma,avx512vl"))) in avxifma-builtins.c for those 4 tests since we specify the -target-feature +avxifma in that test file.

#include "builtin_test_helpers.h"


__attribute__((target("avx512ifma,avx512vl")))
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't understand why this is necessary

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

+ /mnt/f/llvm-project/build/bin/FileCheck /mnt/f/llvm-project/clang/test/CodeGen/X86/avxifma-builtins.c
/mnt/f/llvm-project/clang/test/CodeGen/X86/avxifma-builtins.c:18:10: error: always_inline function '_mm_madd52hi_epu64' requires target feature 'avx512ifma', but would be inlined into function 'test_mm_madd52hi_epu64' that is compiled without support for 'avx512ifma'
   18 |   return _mm_madd52hi_epu64(__X, __Y, __Z);
      |          ^
1 error generated.
FileCheck error: '<stdin>' is empty.
FileCheck command line:  /mnt/f/llvm-project/build/bin/FileCheck /mnt/f/llvm-project/clang/test/CodeGen/X86/avxifma-builtins.c

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The fact that it didn't require this beforehand is very suspicous but I can't see the mistake right now @FreddyLeaf @phoebewang any ideas?

Copy link
Contributor Author

@chaitanyav chaitanyav Oct 9, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i think i understand why, in clang/lib/Headers/avx512ifmavlintrin.hpreviously it was a macro, now it's a static function. That is why we need the __attribute__

#define _mm_madd52hi_epu64(X, Y, Z)                                            \
  ((__m128i)__builtin_ia32_vpmadd52huq128((__v2di)(X), (__v2di)(Y),            \
                                          (__v2di)(Z)))
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_madd52hi_epu64(__m128i __X, __m128i __Y, __m128i __Z) {
  return (__m128i)__builtin_ia32_vpmadd52huq128((__v2di)__X, (__v2di)__Y,
                                                (__v2di)__Z);
}

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We cannot change it to static functions directly. I have a workaround that may help here: #162760

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i will rebase and resolve once those changes are merged

…Allow AVX/AVX512 IFMA madd52 intrinsics to be used in constexpr

Resolves llvm#160498
Copy link
Collaborator

@RKSimon RKSimon left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM - cheers

@RKSimon RKSimon enabled auto-merge (squash) October 14, 2025 13:21
@RKSimon RKSimon merged commit b110b7d into llvm:main Oct 14, 2025
9 of 10 checks passed
akadutta pushed a commit to akadutta/llvm-project that referenced this pull request Oct 14, 2025
…Allow AVX/AVX512 IFMA madd52 intrinsics to be used in constexpr (llvm#161056)

Resolves llvm#160498
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

backend:X86 clang:bytecode Issues for the clang bytecode constexpr interpreter clang:frontend Language frontend issues, e.g. anything involving "Sema" clang:headers Headers provided by Clang, e.g. for intrinsics clang Clang issues not falling into any other category

Projects

None yet

Development

Successfully merging this pull request may close these issues.

[X86][Clang] VectorExprEvaluator::VisitCallExpr / InterpretBuiltin - allow AVX/AVX512 IFMA madd52 intrinsics to be used in constexpr

6 participants